From ae01f84b93b274e2f215bdf6d0b46435679b5f9a Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 31 May 2010 18:45:11 +0000 Subject: powerpc: Optimise per cpu accesses on 64bit Now we dynamically allocate the paca array, it takes an extra load whenever we want to access another cpu's paca. One place we do that a lot is per cpu variables. A simple example: DEFINE_PER_CPU(unsigned long, vara); unsigned long test4(int cpu) { return per_cpu(vara, cpu); } This takes 4 loads, 5 if you include the actual load of the per cpu variable: ld r11,-32760(r30) # load address of paca pointer ld r9,-32768(r30) # load link address of percpu variable sldi r3,r29,9 # get offset into paca (each entry is 512 bytes) ld r0,0(r11) # load paca pointer add r3,r0,r3 # paca + offset ld r11,64(r3) # load paca[cpu].data_offset ldx r3,r9,r11 # load per cpu variable If we remove the ppc64 specific per_cpu_offset(), we get the generic one which indexes into a statically allocated array. This removes one load and one add: ld r11,-32760(r30) # load address of __per_cpu_offset ld r9,-32768(r30) # load link address of percpu variable sldi r3,r29,3 # get offset into __per_cpu_offset (each entry 8 bytes) ldx r11,r11,r3 # load __per_cpu_offset[cpu] ldx r3,r9,r11 # load per cpu variable Having all the offsets in one array also helps when iterating over a per cpu variable across a number of cpus, such as in the scheduler. Before we would need to load one paca cacheline when calculating each per cpu offset. Now we have 16 (128 / sizeof(long)) per cpu offsets in each cacheline. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup_64.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel/setup_64.c') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 643dcac40fcb..c352f322dbdd 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -600,6 +600,9 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to) return REMOTE_DISTANCE; } +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); + void __init setup_per_cpu_areas(void) { const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; @@ -624,8 +627,10 @@ void __init setup_per_cpu_areas(void) panic("cannot initialize percpu area (err=%d)", rc); delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; - for_each_possible_cpu(cpu) - paca[cpu].data_offset = delta + pcpu_unit_offsets[cpu]; + for_each_possible_cpu(cpu) { + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; + paca[cpu].data_offset = __per_cpu_offset[cpu]; + } } #endif -- cgit From fc53b4202e61c7e9008c241933ae282aab8a6082 Mon Sep 17 00:00:00 2001 From: Matt Evans Date: Wed, 7 Jul 2010 21:55:37 +0000 Subject: powerpc/kexec: Switch to a static PACA on the way out With dynamic PACAs, the kexecing CPU's PACA won't lie within the kernel static data and there is a chance that something may stomp it when preparing to kexec. This patch switches this final CPU to a static PACA just before we pull the switch. Signed-off-by: Matt Evans Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup_64.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'arch/powerpc/kernel/setup_64.c') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index c352f322dbdd..96e662c1d46b 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -142,16 +142,6 @@ early_param("smt-enabled", early_smt_enabled); #define check_smt_enabled() #endif /* CONFIG_SMP */ -/* Put the paca pointer into r13 and SPRG_PACA */ -static void __init setup_paca(struct paca_struct *new_paca) -{ - local_paca = new_paca; - mtspr(SPRN_SPRG_PACA, local_paca); -#ifdef CONFIG_PPC_BOOK3E - mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); -#endif -} - /* * Early initialization entry point. This is called by head.S * with MMU translation disabled. We rely on the "feature" of -- cgit From 954e6da54b2f3a5e2634312db800bc1395c509ee Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Thu, 5 Aug 2010 07:42:11 +0000 Subject: powerpc: Correct smt_enabled=X boot option for > 2 threads per core The 'smt_enabled=X' boot option does not handle values of X > 2. For Power 7 processors with smt modes of 0,1,2,3, and 4 this does not work. This patch allows the smt_enabled option to be set to any value limited to a max equal to the number of threads per core. Signed-off-by: Nathan Fontenot Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup_64.c | 63 ++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 27 deletions(-) (limited to 'arch/powerpc/kernel/setup_64.c') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 1bee4b68fa45..e72690ec9b87 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -95,7 +95,7 @@ int ucache_bsize; #ifdef CONFIG_SMP -static int smt_enabled_cmdline; +static char *smt_enabled_cmdline; /* Look for ibm,smt-enabled OF option */ static void check_smt_enabled(void) @@ -103,37 +103,46 @@ static void check_smt_enabled(void) struct device_node *dn; const char *smt_option; - /* Allow the command line to overrule the OF option */ - if (smt_enabled_cmdline) - return; - - dn = of_find_node_by_path("/options"); - - if (dn) { - smt_option = of_get_property(dn, "ibm,smt-enabled", NULL); + /* Default to enabling all threads */ + smt_enabled_at_boot = threads_per_core; - if (smt_option) { - if (!strcmp(smt_option, "on")) - smt_enabled_at_boot = 1; - else if (!strcmp(smt_option, "off")) - smt_enabled_at_boot = 0; - } - } + /* Allow the command line to overrule the OF option */ + if (smt_enabled_cmdline) { + if (!strcmp(smt_enabled_cmdline, "on")) + smt_enabled_at_boot = threads_per_core; + else if (!strcmp(smt_enabled_cmdline, "off")) + smt_enabled_at_boot = 0; + else { + long smt; + int rc; + + rc = strict_strtol(smt_enabled_cmdline, 10, &smt); + if (!rc) + smt_enabled_at_boot = + min(threads_per_core, (int)smt); + } + } else { + dn = of_find_node_by_path("/options"); + if (dn) { + smt_option = of_get_property(dn, "ibm,smt-enabled", + NULL); + + if (smt_option) { + if (!strcmp(smt_option, "on")) + smt_enabled_at_boot = threads_per_core; + else if (!strcmp(smt_option, "off")) + smt_enabled_at_boot = 0; + } + + of_node_put(dn); + } + } } /* Look for smt-enabled= cmdline option */ static int __init early_smt_enabled(char *p) { - smt_enabled_cmdline = 1; - - if (!p) - return 0; - - if (!strcmp(p, "on") || !strcmp(p, "1")) - smt_enabled_at_boot = 1; - else if (!strcmp(p, "off") || !strcmp(p, "0")) - smt_enabled_at_boot = 0; - + smt_enabled_cmdline = p; return 0; } early_param("smt-enabled", early_smt_enabled); @@ -380,8 +389,8 @@ void __init setup_system(void) */ xmon_setup(); - check_smt_enabled(); smp_setup_cpu_maps(); + check_smt_enabled(); #ifdef CONFIG_SMP /* Release secondary cpus out of their spinloops at 0x60 now that -- cgit