From 405d967dc70002991f8fc35c20e0d3cbc7614f63 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Jun 2009 15:13:38 +0900 Subject: linker script: throw away .discard section x86 throws away .discard section but no other archs do. Also, .discard is not thrown away while linking modules. Make every arch and module linking throw it away. This will be used to define dummy variables for percpu declarations and definitions. This patch is based on Ivan Kokshaysky's alpha percpu patch. [ Impact: always throw away everything in .discard ] Signed-off-by: Tejun Heo Cc: Ivan Kokshaysky Cc: Richard Henderson Cc: Russell King Cc: Haavard Skinnemoen Cc: Bryan Wu Cc: Mikael Starvik Cc: Jesper Nilsson Cc: David Howells Cc: Yoshinori Sato Cc: Tony Luck Cc: Hirokazu Takata Cc: Geert Uytterhoeven Cc: Michal Simek Cc: Ralf Baechle Cc: Kyle McMartin Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Paul Mundt Cc: David S. Miller Cc: Jeff Dike Cc: Chris Zankel Cc: Rusty Russell Cc: Ingo Molnar --- arch/powerpc/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 8ef8a14abc95..7fca9355fd3d 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -40,6 +40,7 @@ SECTIONS /* Sections to be discarded. */ /DISCARD/ : { *(.exitcall.exit) + *(.discard) EXIT_DATA } -- cgit From 023bf6f1b8bf58dc4da7f0dc1cf4787b0d5297c1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 9 Jul 2009 11:27:40 +0900 Subject: linker script: unify usage of discard definition Discarded sections in different archs share some commonality but have considerable differences. This led to linker script for each arch implementing its own /DISCARD/ definition, which makes maintaining tedious and adding new entries error-prone. This patch makes all linker scripts to move discard definitions to the end of the linker script and use the common DISCARDS macro. As ld uses the first matching section definition, archs can include default discarded sections by including them earlier in the linker script. ia64 is notable because it first throws away some ia64 specific subsections and then include the rest of the sections into the final image, so those sections must be discarded before the inclusion. defconfig compile tested for x86, x86-64, powerpc, powerpc64, ia64, alpha, sparc, sparc64 and s390. Michal Simek tested microblaze. Signed-off-by: Tejun Heo Acked-by: Paul Mundt Acked-by: Mike Frysinger Tested-by: Michal Simek Cc: linux-arch@vger.kernel.org Cc: Michal Simek Cc: microblaze-uclinux@itee.uq.edu.au Cc: Sam Ravnborg Cc: Tony Luck --- arch/powerpc/kernel/vmlinux.lds.S | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 7fca9355fd3d..244e3658983c 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -37,13 +37,6 @@ jiffies = jiffies_64 + 4; #endif SECTIONS { - /* Sections to be discarded. */ - /DISCARD/ : { - *(.exitcall.exit) - *(.discard) - EXIT_DATA - } - . = KERNELBASE; /* @@ -299,4 +292,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); _end = . ; PROVIDE32 (end = .); + + /* Sections to be discarded. */ + DISCARDS } -- cgit From a42548a18866e87092db93b771e6c5b060d78401 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 29 Jul 2009 12:15:29 +0200 Subject: cputime: Optimize jiffies_to_cputime(1) For powerpc with CONFIG_VIRT_CPU_ACCOUNTING jiffies_to_cputime(1) is not compile time constant and run time calculations are quite expensive. To optimize we use precomputed value. For all other architectures is is preprocessor definition. Signed-off-by: Stanislaw Gruszka Acked-by: Peter Zijlstra Acked-by: Thomas Gleixner Cc: Oleg Nesterov Cc: Andrew Morton Cc: Paul Mackerras Cc: Benjamin Herrenschmidt LKML-Reference: <1248862529-6063-5-git-send-email-sgruszka@redhat.com> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/time.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index eae4511ceeac..211d7b0cd370 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -193,6 +193,8 @@ EXPORT_SYMBOL(__cputime_clockt_factor); DEFINE_PER_CPU(unsigned long, cputime_last_delta); DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta); +cputime_t cputime_one_jiffy; + static void calc_cputime_factors(void) { struct div_result res; @@ -500,6 +502,7 @@ static int __init iSeries_tb_recal(void) tb_to_xs = divres.result_low; vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; vdso_data->tb_to_xs = tb_to_xs; + setup_cputime_one_jiffy(); } else { printk( "Titan recalibrate: FAILED (difference > 4 percent)\n" @@ -945,6 +948,7 @@ void __init time_init(void) tb_ticks_per_usec = ppc_tb_freq / 1000000; tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000); calc_cputime_factors(); + setup_cputime_one_jiffy(); /* * Calculate the length of each tick in ns. It will not be -- cgit From c2a7e818019f20a5cf7fb26a6eb59e212e6c0cd8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:53 +0900 Subject: powerpc64: convert to dynamic percpu allocator Now that percpu allows arbitrary embedding of the first chunk, powerpc64 can easily be converted to dynamic percpu allocator. Convert it. powerpc supports several large page sizes. Cap atom_size at 1M. There isn't much to gain by going above that anyway. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup_64.c | 61 +++++++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 16 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 1f6816003ebe..aa6e4500635f 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include #include @@ -569,25 +570,53 @@ void cpu_die(void) } #ifdef CONFIG_SMP -void __init setup_per_cpu_areas(void) +#define PCPU_DYN_SIZE () + +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) { - int i; - unsigned long size; - char *ptr; - - /* Copy section for each CPU (we discard the original) */ - size = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE); -#ifdef CONFIG_MODULES - if (size < PERCPU_ENOUGH_ROOM) - size = PERCPU_ENOUGH_ROOM; -#endif + return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align, + __pa(MAX_DMA_ADDRESS)); +} - for_each_possible_cpu(i) { - ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size); +static void __init pcpu_fc_free(void *ptr, size_t size) +{ + free_bootmem(__pa(ptr), size); +} - paca[i].data_offset = ptr - __per_cpu_start; - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); - } +static int pcpu_cpu_distance(unsigned int from, unsigned int to) +{ + if (cpu_to_node(from) == cpu_to_node(to)) + return LOCAL_DISTANCE; + else + return REMOTE_DISTANCE; +} + +void __init setup_per_cpu_areas(void) +{ + const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; + size_t atom_size; + unsigned long delta; + unsigned int cpu; + int rc; + + /* + * Linear mapping is one of 4K, 1M and 16M. For 4K, no need + * to group units. For larger mappings, use 1M atom which + * should be large enough to contain a number of units. + */ + if (mmu_linear_psize == MMU_PAGE_4K) + atom_size = PAGE_SIZE; + else + atom_size = 1 << 20; + + rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, + pcpu_fc_alloc, pcpu_fc_free); + if (rc < 0) + panic("cannot initialize percpu area (err=%d)", rc); + + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) + paca[cpu].data_offset = delta + pcpu_unit_offsets[cpu]; } #endif -- cgit From d4f587c67fc39e0030ddd718675e252e208da4d7 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:31 +0200 Subject: timekeeping: Increase granularity of read_persistent_clock() The persistent clock of some architectures (e.g. s390) have a better granularity than seconds. To reduce the delta between the host clock and the guest clock in a virtualized system change the read_persistent_clock function to return a struct timespec. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134811.013873340@de.ibm.com> Signed-off-by: Thomas Gleixner --- arch/powerpc/kernel/time.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index eae4511ceeac..ad63f30fe3da 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -769,7 +769,7 @@ int update_persistent_clock(struct timespec now) return ppc_md.set_rtc_time(&tm); } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { struct rtc_time tm; static int first = 1; @@ -787,8 +787,9 @@ unsigned long read_persistent_clock(void) if (!ppc_md.get_rtc_time) return 0; ppc_md.get_rtc_time(&tm); - return mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec); + ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec); + ts->tv_nsec = 0; } /* clocksource code */ -- cgit From 30d0b3682887a81f0335b42f20116fd40d743371 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 13 Jul 2009 20:53:51 +0000 Subject: powerpc: Move 64bit VDSO to improve context switch performance On 64bit applications the VDSO is the only thing in segment 0. Since the VDSO is position independent we can remove the hint and let get_unmapped_area pick an area. This will mean the vdso will be near other mmaps and will share an SLB entry: 10000000-10001000 r-xp 00000000 08:06 5778459 /root/context_switch_64 10010000-10011000 r--p 00000000 08:06 5778459 /root/context_switch_64 10011000-10012000 rw-p 00001000 08:06 5778459 /root/context_switch_64 fffa92ae000-fffa92b0000 rw-p 00000000 00:00 0 fffa92b0000-fffa9453000 r-xp 00000000 08:06 4334051 /lib64/power6/libc-2.9.so fffa9453000-fffa9462000 ---p 001a3000 08:06 4334051 /lib64/power6/libc-2.9.so fffa9462000-fffa9466000 r--p 001a2000 08:06 4334051 /lib64/power6/libc-2.9.so fffa9466000-fffa947c000 rw-p 001a6000 08:06 4334051 /lib64/power6/libc-2.9.so fffa947c000-fffa9480000 rw-p 00000000 00:00 0 fffa9480000-fffa94a8000 r-xp 00000000 08:06 4333852 /lib64/ld-2.9.so fffa94b3000-fffa94b4000 rw-p 00000000 00:00 0 fffa94b4000-fffa94b7000 r-xp 00000000 00:00 0 [vdso] <----- here I am fffa94b7000-fffa94b8000 r--p 00027000 08:06 4333852 /lib64/ld-2.9.so fffa94b8000-fffa94bb000 rw-p 00028000 08:06 4333852 /lib64/ld-2.9.so fffa94bb000-fffa94bc000 rw-p 00000000 00:00 0 fffe4c10000-fffe4c25000 rw-p 00000000 00:00 0 [stack] On a microbenchmark that bounces a token between two 64bit processes over pipes and calls gettimeofday each iteration (to access the VDSO), our context switch rate goes from 268k to 277k ctx switches/sec (tested on a 4GHz POWER6). Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vdso.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index ad06d5c75b15..a0abce251d0a 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -203,7 +203,12 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) } else { vdso_pagelist = vdso64_pagelist; vdso_pages = vdso64_pages; - vdso_base = VDSO64_MBASE; + /* + * On 64bit we don't have a preferred map address. This + * allows get_unmapped_area to find an area near other mmaps + * and most likely share a SLB entry. + */ + vdso_base = 0; } #else vdso_pagelist = vdso32_pagelist; -- cgit From 8aa34ab8b2dc96ca6c4feecfb87ed13f0d40ef98 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 14 Jul 2009 20:52:52 +0000 Subject: powerpc: Rename exception.h to exception-64s.h The file include/asm/exception.h contains definitions that are specific to exception handling on 64-bit server type processors. This renames the file to exception-64s.h to reflect that fact and avoid confusion. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64s.S | 2 ++ arch/powerpc/kernel/head_64.S | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index eb898112e577..72644cf22cac 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -12,6 +12,8 @@ * */ +#include + /* * We layout physical memory as follows: * 0x0000 - 0x00ff : Secondary processor spin code diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 012505ebd9f9..9196ef36d433 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -36,7 +36,6 @@ #include #include #include -#include #include /* The physical memory is layed out such that the secondary processor -- cgit From ee43eb788b3a06425fffb912677e2e1c8b00dd3b Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 14 Jul 2009 20:52:54 +0000 Subject: powerpc: Use names rather than numbers for SPRGs (v2) The kernel uses SPRG registers for various purposes, typically in low level assembly code as scratch registers or to hold per-cpu global infos such as the PACA or the current thread_info pointer. We want to be able to easily shuffle the usage of those registers as some implementations have specific constraints realted to some of them, for example, some have userspace readable aliases, etc.. and the current choice isn't always the best. This patch should not change any code generation, and replaces the usage of SPRN_SPRGn everywhere in the kernel with a named replacement and adds documentation next to the definition of the names as to what those are used for on each processor family. The only parts that still use the original numbers are bits of KVM or suspend/resume code that just blindly needs to save/restore all the SPRGs. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/cpu_setup_6xx.S | 2 +- arch/powerpc/kernel/entry_32.S | 20 +++--- arch/powerpc/kernel/entry_64.S | 4 +- arch/powerpc/kernel/exceptions-64s.S | 44 +++++-------- arch/powerpc/kernel/fpu.S | 2 +- arch/powerpc/kernel/head_32.S | 40 +++++------ arch/powerpc/kernel/head_40x.S | 124 +++++++++++++++++------------------ arch/powerpc/kernel/head_44x.S | 56 ++++++++-------- arch/powerpc/kernel/head_64.S | 14 ++-- arch/powerpc/kernel/head_8xx.S | 13 ++-- arch/powerpc/kernel/head_booke.h | 50 ++++++-------- arch/powerpc/kernel/head_fsl_booke.S | 60 ++++++++--------- arch/powerpc/kernel/setup_64.c | 4 +- arch/powerpc/kernel/vector.S | 2 +- 14 files changed, 207 insertions(+), 228 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index 1e9949e68856..55cba4a8a959 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -21,7 +21,7 @@ _GLOBAL(__setup_cpu_603) mflr r4 BEGIN_MMU_FTR_SECTION li r10,0 - mtspr SPRN_SPRG4,r10 /* init SW LRU tracking */ + mtspr SPRN_SPRG_603_LRU,r10 /* init SW LRU tracking */ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) BEGIN_FTR_SECTION bl __init_fpu_registers diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 3cadba60a4b6..1175a8539e6c 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -88,7 +88,7 @@ crit_transfer_to_handler: mfspr r0,SPRN_SRR1 stw r0,_SRR1(r11) - mfspr r8,SPRN_SPRG3 + mfspr r8,SPRN_SPRG_THREAD lwz r0,KSP_LIMIT(r8) stw r0,SAVED_KSP_LIMIT(r11) rlwimi r0,r1,0,0,(31-THREAD_SHIFT) @@ -108,7 +108,7 @@ crit_transfer_to_handler: mfspr r0,SPRN_SRR1 stw r0,crit_srr1@l(0) - mfspr r8,SPRN_SPRG3 + mfspr r8,SPRN_SPRG_THREAD lwz r0,KSP_LIMIT(r8) stw r0,saved_ksp_limit@l(0) rlwimi r0,r1,0,0,(31-THREAD_SHIFT) @@ -138,7 +138,7 @@ transfer_to_handler: mfspr r2,SPRN_XER stw r12,_CTR(r11) stw r2,_XER(r11) - mfspr r12,SPRN_SPRG3 + mfspr r12,SPRN_SPRG_THREAD addi r2,r12,-THREAD tovirt(r2,r2) /* set r2 to current */ beq 2f /* if from user, fix up THREAD.regs */ @@ -680,7 +680,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE) tophys(r0,r4) CLR_TOP32(r0) - mtspr SPRN_SPRG3,r0 /* Update current THREAD phys addr */ + mtspr SPRN_SPRG_THREAD,r0 /* Update current THREAD phys addr */ lwz r1,KSP(r4) /* Load new stack pointer */ /* save the old current 'last' for return value */ @@ -1057,7 +1057,7 @@ exc_exit_restart_end: #ifdef CONFIG_40x .globl ret_from_crit_exc ret_from_crit_exc: - mfspr r9,SPRN_SPRG3 + mfspr r9,SPRN_SPRG_THREAD lis r10,saved_ksp_limit@ha; lwz r10,saved_ksp_limit@l(r10); tovirt(r9,r9); @@ -1074,7 +1074,7 @@ ret_from_crit_exc: #ifdef CONFIG_BOOKE .globl ret_from_crit_exc ret_from_crit_exc: - mfspr r9,SPRN_SPRG3 + mfspr r9,SPRN_SPRG_THREAD lwz r10,SAVED_KSP_LIMIT(r1) stw r10,KSP_LIMIT(r9) RESTORE_xSRR(SRR0,SRR1); @@ -1083,7 +1083,7 @@ ret_from_crit_exc: .globl ret_from_debug_exc ret_from_debug_exc: - mfspr r9,SPRN_SPRG3 + mfspr r9,SPRN_SPRG_THREAD lwz r10,SAVED_KSP_LIMIT(r1) stw r10,KSP_LIMIT(r9) lwz r9,THREAD_INFO-THREAD(r9) @@ -1097,7 +1097,7 @@ ret_from_debug_exc: .globl ret_from_mcheck_exc ret_from_mcheck_exc: - mfspr r9,SPRN_SPRG3 + mfspr r9,SPRN_SPRG_THREAD lwz r10,SAVED_KSP_LIMIT(r1) stw r10,KSP_LIMIT(r9) RESTORE_xSRR(SRR0,SRR1); @@ -1255,7 +1255,7 @@ _GLOBAL(enter_rtas) MTMSRD(r0) /* don't get trashed */ li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR) mtlr r6 - mtspr SPRN_SPRG2,r7 + mtspr SPRN_SPRG_RTAS,r7 mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 RFI @@ -1265,7 +1265,7 @@ _GLOBAL(enter_rtas) FIX_SRR1(r9,r0) addi r1,r1,INT_FRAME_SIZE li r0,0 - mtspr SPRN_SPRG2,r0 + mtspr SPRN_SPRG_RTAS,r0 mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 RFI /* return to caller */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 43e073477c34..dbf0e3115611 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -762,7 +762,7 @@ _GLOBAL(enter_rtas) _STATIC(rtas_return_loc) /* relocation is off at this point */ - mfspr r4,SPRN_SPRG3 /* Get PACA */ + mfspr r4,SPRN_SPRG_PACA /* Get PACA */ clrldi r4,r4,2 /* convert to realmode address */ bcl 20,31,$+4 @@ -793,7 +793,7 @@ _STATIC(rtas_restore_regs) REST_8GPRS(14, r1) /* Restore the non-volatiles */ REST_10GPRS(22, r1) /* ditto */ - mfspr r13,SPRN_SPRG3 + mfspr r13,SPRN_SPRG_PACA ld r4,_CCR(r1) mtcr r4 diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 72644cf22cac..4e9640cc0563 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -24,18 +24,6 @@ * 0x8000 - : Early init and support code */ - -/* - * SPRG Usage - * - * Register Definition - * - * SPRG0 reserved for hypervisor - * SPRG1 temp - used to save gpr - * SPRG2 temp - used to save gpr - * SPRG3 virt addr of paca - */ - /* * This is the start of the interrupt handlers for pSeries * This code runs with relocation off. @@ -53,16 +41,16 @@ __start_interrupts: . = 0x200 _machine_check_pSeries: HMT_MEDIUM - mtspr SPRN_SPRG1,r13 /* save r13 */ + mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */ EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) . = 0x300 .globl data_access_pSeries data_access_pSeries: HMT_MEDIUM - mtspr SPRN_SPRG1,r13 + mtspr SPRN_SPRG_SCRATCH0,r13 BEGIN_FTR_SECTION - mtspr SPRN_SPRG2,r12 + mtspr SPRN_SPRG_SCRATCH1,r12 mfspr r13,SPRN_DAR mfspr r12,SPRN_DSISR srdi r13,r13,60 @@ -71,7 +59,7 @@ BEGIN_FTR_SECTION cmpwi r13,0x2c beq do_stab_bolted_pSeries mtcrf 0x80,r12 - mfspr r12,SPRN_SPRG2 + mfspr r12,SPRN_SPRG_SCRATCH1 END_FTR_SECTION_IFCLR(CPU_FTR_SLB) EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common) @@ -79,8 +67,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB) .globl data_access_slb_pSeries data_access_slb_pSeries: HMT_MEDIUM - mtspr SPRN_SPRG1,r13 - mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ + mtspr SPRN_SPRG_SCRATCH0,r13 + mfspr r13,SPRN_SPRG_PACA /* get paca address into r13 */ std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_DAR std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ @@ -93,7 +81,7 @@ data_access_slb_pSeries: std r10,PACA_EXSLB+EX_R10(r13) std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - mfspr r10,SPRN_SPRG1 + mfspr r10,SPRN_SPRG_SCRATCH0 std r10,PACA_EXSLB+EX_R13(r13) mfspr r12,SPRN_SRR1 /* and SRR1 */ #ifndef CONFIG_RELOCATABLE @@ -117,8 +105,8 @@ data_access_slb_pSeries: .globl instruction_access_slb_pSeries instruction_access_slb_pSeries: HMT_MEDIUM - mtspr SPRN_SPRG1,r13 - mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ + mtspr SPRN_SPRG_SCRATCH0,r13 + mfspr r13,SPRN_SPRG_PACA /* get paca address into r13 */ std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ @@ -131,7 +119,7 @@ instruction_access_slb_pSeries: std r10,PACA_EXSLB+EX_R10(r13) std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - mfspr r10,SPRN_SPRG1 + mfspr r10,SPRN_SPRG_SCRATCH0 std r10,PACA_EXSLB+EX_R13(r13) mfspr r12,SPRN_SRR1 /* and SRR1 */ #ifndef CONFIG_RELOCATABLE @@ -161,7 +149,7 @@ BEGIN_FTR_SECTION beq- 1f END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) mr r9,r13 - mfspr r13,SPRN_SPRG3 + mfspr r13,SPRN_SPRG_PACA mfspr r11,SPRN_SRR0 ld r12,PACAKBASE(r13) ld r10,PACAKMSR(r13) @@ -230,14 +218,14 @@ masked_interrupt: rotldi r10,r10,16 mtspr SPRN_SRR1,r10 ld r10,PACA_EXGEN+EX_R10(r13) - mfspr r13,SPRN_SPRG1 + mfspr r13,SPRN_SPRG_SCRATCH0 rfid b . .align 7 do_stab_bolted_pSeries: mtcrf 0x80,r12 - mfspr r12,SPRN_SPRG2 + mfspr r12,SPRN_SPRG_SCRATCH1 EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) #ifdef CONFIG_PPC_PSERIES @@ -248,14 +236,14 @@ do_stab_bolted_pSeries: .align 7 system_reset_fwnmi: HMT_MEDIUM - mtspr SPRN_SPRG1,r13 /* save r13 */ + mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) .globl machine_check_fwnmi .align 7 machine_check_fwnmi: HMT_MEDIUM - mtspr SPRN_SPRG1,r13 /* save r13 */ + mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */ EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) #endif /* CONFIG_PPC_PSERIES */ @@ -270,7 +258,7 @@ slb_miss_user_pseries: std r10,PACA_EXGEN+EX_R10(r13) std r11,PACA_EXGEN+EX_R11(r13) std r12,PACA_EXGEN+EX_R12(r13) - mfspr r10,SPRG1 + mfspr r10,SPRG_SCRATCH0 ld r11,PACA_EXSLB+EX_R9(r13) ld r12,PACA_EXSLB+EX_R3(r13) std r10,PACA_EXGEN+EX_R13(r13) diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 2436df33c6f4..fc8f5b14019c 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -91,7 +91,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif /* CONFIG_SMP */ /* enable use of FP after return */ #ifdef CONFIG_PPC32 - mfspr r5,SPRN_SPRG3 /* current task's THREAD (phys) */ + mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ lwz r4,THREAD_FPEXC_MODE(r5) ori r9,r9,MSR_FP /* enable FP for current */ or r9,r9,r4 diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index fc2132942754..829c3fe7c5a2 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -244,8 +244,8 @@ __secondary_hold_acknowledge: * task's thread_struct. */ #define EXCEPTION_PROLOG \ - mtspr SPRN_SPRG0,r10; \ - mtspr SPRN_SPRG1,r11; \ + mtspr SPRN_SPRG_SCRATCH0,r10; \ + mtspr SPRN_SPRG_SCRATCH1,r11; \ mfcr r10; \ EXCEPTION_PROLOG_1; \ EXCEPTION_PROLOG_2 @@ -255,7 +255,7 @@ __secondary_hold_acknowledge: andi. r11,r11,MSR_PR; \ tophys(r11,r1); /* use tophys(r1) if kernel */ \ beq 1f; \ - mfspr r11,SPRN_SPRG3; \ + mfspr r11,SPRN_SPRG_THREAD; \ lwz r11,THREAD_INFO-THREAD(r11); \ addi r11,r11,THREAD_SIZE; \ tophys(r11,r11); \ @@ -267,9 +267,9 @@ __secondary_hold_acknowledge: stw r10,_CCR(r11); /* save registers */ \ stw r12,GPR12(r11); \ stw r9,GPR9(r11); \ - mfspr r10,SPRN_SPRG0; \ + mfspr r10,SPRN_SPRG_SCRATCH0; \ stw r10,GPR10(r11); \ - mfspr r12,SPRN_SPRG1; \ + mfspr r12,SPRN_SPRG_SCRATCH1; \ stw r12,GPR11(r11); \ mflr r10; \ stw r10,_LINK(r11); \ @@ -355,11 +355,11 @@ i##n: \ * -- paulus. */ . = 0x200 - mtspr SPRN_SPRG0,r10 - mtspr SPRN_SPRG1,r11 + mtspr SPRN_SPRG_SCRATCH0,r10 + mtspr SPRN_SPRG_SCRATCH1,r11 mfcr r10 #ifdef CONFIG_PPC_CHRP - mfspr r11,SPRN_SPRG2 + mfspr r11,SPRN_SPRG_RTAS cmpwi 0,r11,0 bne 7f #endif /* CONFIG_PPC_CHRP */ @@ -367,7 +367,7 @@ i##n: \ 7: EXCEPTION_PROLOG_2 addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_CHRP - mfspr r4,SPRN_SPRG2 + mfspr r4,SPRN_SPRG_RTAS cmpwi cr1,r4,0 bne cr1,1f #endif @@ -485,7 +485,7 @@ InstructionTLBMiss: mfspr r3,SPRN_IMISS lis r1,PAGE_OFFSET@h /* check if kernel address */ cmplw 0,r1,r3 - mfspr r2,SPRN_SPRG3 + mfspr r2,SPRN_SPRG_THREAD li r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */ lwz r2,PGDIR(r2) bge- 112f @@ -559,7 +559,7 @@ DataLoadTLBMiss: mfspr r3,SPRN_DMISS lis r1,PAGE_OFFSET@h /* check if kernel address */ cmplw 0,r1,r3 - mfspr r2,SPRN_SPRG3 + mfspr r2,SPRN_SPRG_THREAD li r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */ lwz r2,PGDIR(r2) bge- 112f @@ -598,12 +598,12 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mtcrf 0x80,r2 BEGIN_MMU_FTR_SECTION li r0,1 - mfspr r1,SPRN_SPRG4 + mfspr r1,SPRN_SPRG_603_LRU rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */ slw r0,r0,r2 xor r1,r0,r1 srw r0,r1,r2 - mtspr SPRN_SPRG4,r1 + mtspr SPRN_SPRG_603_LRU,r1 mfspr r2,SPRN_SRR1 rlwimi r2,r0,31-14,14,14 mtspr SPRN_SRR1,r2 @@ -643,7 +643,7 @@ DataStoreTLBMiss: mfspr r3,SPRN_DMISS lis r1,PAGE_OFFSET@h /* check if kernel address */ cmplw 0,r1,r3 - mfspr r2,SPRN_SPRG3 + mfspr r2,SPRN_SPRG_THREAD li r1,_PAGE_RW|_PAGE_USER|_PAGE_PRESENT /* access flags */ lwz r2,PGDIR(r2) bge- 112f @@ -678,12 +678,12 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mtcrf 0x80,r2 BEGIN_MMU_FTR_SECTION li r0,1 - mfspr r1,SPRN_SPRG4 + mfspr r1,SPRN_SPRG_603_LRU rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */ slw r0,r0,r2 xor r1,r0,r1 srw r0,r1,r2 - mtspr SPRN_SPRG4,r1 + mtspr SPRN_SPRG_603_LRU,r1 mfspr r2,SPRN_SRR1 rlwimi r2,r0,31-14,14,14 mtspr SPRN_SRR1,r2 @@ -864,9 +864,9 @@ __secondary_start: tophys(r4,r2) addi r4,r4,THREAD /* phys address of our thread_struct */ CLR_TOP32(r4) - mtspr SPRN_SPRG3,r4 + mtspr SPRN_SPRG_THREAD,r4 li r3,0 - mtspr SPRN_SPRG2,r3 /* 0 => not in RTAS */ + mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */ /* enable MMU and jump to start_secondary */ li r4,MSR_KERNEL @@ -947,9 +947,9 @@ start_here: tophys(r4,r2) addi r4,r4,THREAD /* init task's THREAD */ CLR_TOP32(r4) - mtspr SPRN_SPRG3,r4 + mtspr SPRN_SPRG_THREAD,r4 li r3,0 - mtspr SPRN_SPRG2,r3 /* 0 => not in RTAS */ + mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */ /* stack */ lis r1,init_thread_union@ha diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 0c96911d4299..a90625f9b485 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -103,21 +103,21 @@ _ENTRY(saved_ksp_limit) /* * Exception vector entry code. This code runs with address translation - * turned off (i.e. using physical addresses). We assume SPRG3 has the - * physical address of the current task thread_struct. + * turned off (i.e. using physical addresses). We assume SPRG_THREAD has + * the physical address of the current task thread_struct. * Note that we have to have decremented r1 before we write to any fields * of the exception frame, since a critical interrupt could occur at any * time, and it will write to the area immediately below the current r1. */ #define NORMAL_EXCEPTION_PROLOG \ - mtspr SPRN_SPRG0,r10; /* save two registers to work with */\ - mtspr SPRN_SPRG1,r11; \ - mtspr SPRN_SPRG2,r1; \ + mtspr SPRN_SPRG_SCRATCH0,r10; /* save two registers to work with */\ + mtspr SPRN_SPRG_SCRATCH1,r11; \ + mtspr SPRN_SPRG_SCRATCH2,r1; \ mfcr r10; /* save CR in r10 for now */\ mfspr r11,SPRN_SRR1; /* check whether user or kernel */\ andi. r11,r11,MSR_PR; \ beq 1f; \ - mfspr r1,SPRN_SPRG3; /* if from user, start at top of */\ + mfspr r1,SPRN_SPRG_THREAD; /* if from user, start at top of */\ lwz r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack */\ addi r1,r1,THREAD_SIZE; \ 1: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\ @@ -125,13 +125,13 @@ _ENTRY(saved_ksp_limit) stw r10,_CCR(r11); /* save various registers */\ stw r12,GPR12(r11); \ stw r9,GPR9(r11); \ - mfspr r10,SPRN_SPRG0; \ + mfspr r10,SPRN_SPRG_SCRATCH0; \ stw r10,GPR10(r11); \ - mfspr r12,SPRN_SPRG1; \ + mfspr r12,SPRN_SPRG_SCRATCH1; \ stw r12,GPR11(r11); \ mflr r10; \ stw r10,_LINK(r11); \ - mfspr r10,SPRN_SPRG2; \ + mfspr r10,SPRN_SPRG_SCRATCH2; \ mfspr r12,SPRN_SRR0; \ stw r10,GPR1(r11); \ mfspr r9,SPRN_SRR1; \ @@ -160,7 +160,7 @@ _ENTRY(saved_ksp_limit) lwz r11,critirq_ctx@l(r11); \ beq 1f; \ /* COMING FROM USER MODE */ \ - mfspr r11,SPRN_SPRG3; /* if from user, start at top of */\ + mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\ 1: addi r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc an excpt frm */\ tophys(r11,r11); \ @@ -265,8 +265,8 @@ label: * and exit. Otherwise, we call heavywight functions to do the work. */ START_EXCEPTION(0x0300, DataStorage) - mtspr SPRN_SPRG0, r10 /* Save some working registers */ - mtspr SPRN_SPRG1, r11 + mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */ + mtspr SPRN_SPRG_SCRATCH1, r11 #ifdef CONFIG_403GCX stw r12, 0(r0) stw r9, 4(r0) @@ -275,12 +275,12 @@ label: stw r11, 8(r0) stw r12, 12(r0) #else - mtspr SPRN_SPRG4, r12 - mtspr SPRN_SPRG5, r9 + mtspr SPRN_SPRG_SCRATCH3, r12 + mtspr SPRN_SPRG_SCRATCH4, r9 mfcr r11 mfspr r12, SPRN_PID - mtspr SPRN_SPRG7, r11 - mtspr SPRN_SPRG6, r12 + mtspr SPRN_SPRG_SCRATCH6, r11 + mtspr SPRN_SPRG_SCRATCH5, r12 #endif /* First, check if it was a zone fault (which means a user @@ -308,7 +308,7 @@ label: /* Get the PGD for the current thread. */ 3: - mfspr r11,SPRN_SPRG3 + mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) 4: tophys(r11, r11) @@ -355,15 +355,15 @@ label: lwz r9, 4(r0) lwz r12, 0(r0) #else - mfspr r12, SPRN_SPRG6 - mfspr r11, SPRN_SPRG7 + mfspr r12, SPRN_SPRG_SCRATCH5 + mfspr r11, SPRN_SPRG_SCRATCH6 mtspr SPRN_PID, r12 mtcr r11 - mfspr r9, SPRN_SPRG5 - mfspr r12, SPRN_SPRG4 + mfspr r9, SPRN_SPRG_SCRATCH4 + mfspr r12, SPRN_SPRG_SCRATCH3 #endif - mfspr r11, SPRN_SPRG1 - mfspr r10, SPRN_SPRG0 + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r10, SPRN_SPRG_SCRATCH0 PPC405_ERR77_SYNC rfi /* Should sync shadow TLBs */ b . /* prevent prefetch past rfi */ @@ -380,15 +380,15 @@ label: lwz r9, 4(r0) lwz r12, 0(r0) #else - mfspr r12, SPRN_SPRG6 - mfspr r11, SPRN_SPRG7 + mfspr r12, SPRN_SPRG_SCRATCH5 + mfspr r11, SPRN_SPRG_SCRATCH6 mtspr SPRN_PID, r12 mtcr r11 - mfspr r9, SPRN_SPRG5 - mfspr r12, SPRN_SPRG4 + mfspr r9, SPRN_SPRG_SCRATCH4 + mfspr r12, SPRN_SPRG_SCRATCH3 #endif - mfspr r11, SPRN_SPRG1 - mfspr r10, SPRN_SPRG0 + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r10, SPRN_SPRG_SCRATCH0 b DataAccess /* @@ -466,8 +466,8 @@ label: * load TLB entries from the page table if they exist. */ START_EXCEPTION(0x1100, DTLBMiss) - mtspr SPRN_SPRG0, r10 /* Save some working registers */ - mtspr SPRN_SPRG1, r11 + mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */ + mtspr SPRN_SPRG_SCRATCH1, r11 #ifdef CONFIG_403GCX stw r12, 0(r0) stw r9, 4(r0) @@ -476,12 +476,12 @@ label: stw r11, 8(r0) stw r12, 12(r0) #else - mtspr SPRN_SPRG4, r12 - mtspr SPRN_SPRG5, r9 + mtspr SPRN_SPRG_SCRATCH3, r12 + mtspr SPRN_SPRG_SCRATCH4, r9 mfcr r11 mfspr r12, SPRN_PID - mtspr SPRN_SPRG7, r11 - mtspr SPRN_SPRG6, r12 + mtspr SPRN_SPRG_SCRATCH6, r11 + mtspr SPRN_SPRG_SCRATCH5, r12 #endif mfspr r10, SPRN_DEAR /* Get faulting address */ @@ -500,7 +500,7 @@ label: /* Get the PGD for the current thread. */ 3: - mfspr r11,SPRN_SPRG3 + mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) 4: tophys(r11, r11) @@ -550,15 +550,15 @@ label: lwz r9, 4(r0) lwz r12, 0(r0) #else - mfspr r12, SPRN_SPRG6 - mfspr r11, SPRN_SPRG7 + mfspr r12, SPRN_SPRG_SCRATCH5 + mfspr r11, SPRN_SPRG_SCRATCH6 mtspr SPRN_PID, r12 mtcr r11 - mfspr r9, SPRN_SPRG5 - mfspr r12, SPRN_SPRG4 + mfspr r9, SPRN_SPRG_SCRATCH4 + mfspr r12, SPRN_SPRG_SCRATCH3 #endif - mfspr r11, SPRN_SPRG1 - mfspr r10, SPRN_SPRG0 + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r10, SPRN_SPRG_SCRATCH0 b DataAccess /* 0x1200 - Instruction TLB Miss Exception @@ -566,8 +566,8 @@ label: * registers and bailout to a different point. */ START_EXCEPTION(0x1200, ITLBMiss) - mtspr SPRN_SPRG0, r10 /* Save some working registers */ - mtspr SPRN_SPRG1, r11 + mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */ + mtspr SPRN_SPRG_SCRATCH1, r11 #ifdef CONFIG_403GCX stw r12, 0(r0) stw r9, 4(r0) @@ -576,12 +576,12 @@ label: stw r11, 8(r0) stw r12, 12(r0) #else - mtspr SPRN_SPRG4, r12 - mtspr SPRN_SPRG5, r9 + mtspr SPRN_SPRG_SCRATCH3, r12 + mtspr SPRN_SPRG_SCRATCH4, r9 mfcr r11 mfspr r12, SPRN_PID - mtspr SPRN_SPRG7, r11 - mtspr SPRN_SPRG6, r12 + mtspr SPRN_SPRG_SCRATCH6, r11 + mtspr SPRN_SPRG_SCRATCH5, r12 #endif mfspr r10, SPRN_SRR0 /* Get faulting address */ @@ -600,7 +600,7 @@ label: /* Get the PGD for the current thread. */ 3: - mfspr r11,SPRN_SPRG3 + mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) 4: tophys(r11, r11) @@ -650,15 +650,15 @@ label: lwz r9, 4(r0) lwz r12, 0(r0) #else - mfspr r12, SPRN_SPRG6 - mfspr r11, SPRN_SPRG7 + mfspr r12, SPRN_SPRG_SCRATCH5 + mfspr r11, SPRN_SPRG_SCRATCH6 mtspr SPRN_PID, r12 mtcr r11 - mfspr r9, SPRN_SPRG5 - mfspr r12, SPRN_SPRG4 + mfspr r9, SPRN_SPRG_SCRATCH4 + mfspr r12, SPRN_SPRG_SCRATCH3 #endif - mfspr r11, SPRN_SPRG1 - mfspr r10, SPRN_SPRG0 + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r10, SPRN_SPRG_SCRATCH0 b InstructionAccess EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_EE) @@ -803,15 +803,15 @@ finish_tlb_load: lwz r9, 4(r0) lwz r12, 0(r0) #else - mfspr r12, SPRN_SPRG6 - mfspr r11, SPRN_SPRG7 + mfspr r12, SPRN_SPRG_SCRATCH5 + mfspr r11, SPRN_SPRG_SCRATCH6 mtspr SPRN_PID, r12 mtcr r11 - mfspr r9, SPRN_SPRG5 - mfspr r12, SPRN_SPRG4 + mfspr r9, SPRN_SPRG_SCRATCH4 + mfspr r12, SPRN_SPRG_SCRATCH3 #endif - mfspr r11, SPRN_SPRG1 - mfspr r10, SPRN_SPRG0 + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r10, SPRN_SPRG_SCRATCH0 PPC405_ERR77_SYNC rfi /* Should sync shadow TLBs */ b . /* prevent prefetch past rfi */ @@ -835,7 +835,7 @@ start_here: /* ptr to phys current thread */ tophys(r4,r2) addi r4,r4,THREAD /* init task's THREAD */ - mtspr SPRN_SPRG3,r4 + mtspr SPRN_SPRG_THREAD,r4 /* stack */ lis r1,init_thread_union@ha diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 18d8a1677c4d..656cfb2d6666 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -239,7 +239,7 @@ skpinv: addi r4,r4,1 /* Increment */ /* ptr to current thread */ addi r4,r2,THREAD /* init task's THREAD */ - mtspr SPRN_SPRG3,r4 + mtspr SPRN_SPRG_THREAD,r4 /* stack */ lis r1,init_thread_union@h @@ -350,12 +350,12 @@ interrupt_base: /* Data TLB Error Interrupt */ START_EXCEPTION(DataTLBError) - mtspr SPRN_SPRG0, r10 /* Save some working registers */ - mtspr SPRN_SPRG1, r11 - mtspr SPRN_SPRG4W, r12 - mtspr SPRN_SPRG5W, r13 + mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */ + mtspr SPRN_SPRG_WSCRATCH1, r11 + mtspr SPRN_SPRG_WSCRATCH2, r12 + mtspr SPRN_SPRG_WSCRATCH3, r13 mfcr r11 - mtspr SPRN_SPRG7W, r11 + mtspr SPRN_SPRG_WSCRATCH4, r11 mfspr r10, SPRN_DEAR /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -374,7 +374,7 @@ interrupt_base: /* Get the PGD for the current thread */ 3: - mfspr r11,SPRN_SPRG3 + mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) /* Load PID into MMUCR TID */ @@ -446,12 +446,12 @@ tlb_44x_patch_hwater_D: /* The bailout. Restore registers to pre-exception conditions * and call the heavyweights to help us out. */ - mfspr r11, SPRN_SPRG7R + mfspr r11, SPRN_SPRG_RSCRATCH4 mtcr r11 - mfspr r13, SPRN_SPRG5R - mfspr r12, SPRN_SPRG4R - mfspr r11, SPRN_SPRG1 - mfspr r10, SPRN_SPRG0 + mfspr r13, SPRN_SPRG_RSCRATCH3 + mfspr r12, SPRN_SPRG_RSCRATCH2 + mfspr r11, SPRN_SPRG_RSCRATCH1 + mfspr r10, SPRN_SPRG_RSCRATCH0 b DataStorage /* Instruction TLB Error Interrupt */ @@ -461,12 +461,12 @@ tlb_44x_patch_hwater_D: * to a different point. */ START_EXCEPTION(InstructionTLBError) - mtspr SPRN_SPRG0, r10 /* Save some working registers */ - mtspr SPRN_SPRG1, r11 - mtspr SPRN_SPRG4W, r12 - mtspr SPRN_SPRG5W, r13 + mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */ + mtspr SPRN_SPRG_WSCRATCH1, r11 + mtspr SPRN_SPRG_WSCRATCH2, r12 + mtspr SPRN_SPRG_WSCRATCH3, r13 mfcr r11 - mtspr SPRN_SPRG7W, r11 + mtspr SPRN_SPRG_WSCRATCH4, r11 mfspr r10, SPRN_SRR0 /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -485,7 +485,7 @@ tlb_44x_patch_hwater_D: /* Get the PGD for the current thread */ 3: - mfspr r11,SPRN_SPRG3 + mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) /* Load PID into MMUCR TID */ @@ -542,12 +542,12 @@ tlb_44x_patch_hwater_I: /* The bailout. Restore registers to pre-exception conditions * and call the heavyweights to help us out. */ - mfspr r11, SPRN_SPRG7R + mfspr r11, SPRN_SPRG_RSCRATCH4 mtcr r11 - mfspr r13, SPRN_SPRG5R - mfspr r12, SPRN_SPRG4R - mfspr r11, SPRN_SPRG1 - mfspr r10, SPRN_SPRG0 + mfspr r13, SPRN_SPRG_RSCRATCH3 + mfspr r12, SPRN_SPRG_RSCRATCH2 + mfspr r11, SPRN_SPRG_RSCRATCH1 + mfspr r10, SPRN_SPRG_RSCRATCH0 b InstructionStorage /* Debug Interrupt */ @@ -593,12 +593,12 @@ finish_tlb_load: /* Done...restore registers and get out of here. */ - mfspr r11, SPRN_SPRG7R + mfspr r11, SPRN_SPRG_RSCRATCH4 mtcr r11 - mfspr r13, SPRN_SPRG5R - mfspr r12, SPRN_SPRG4R - mfspr r11, SPRN_SPRG1 - mfspr r10, SPRN_SPRG0 + mfspr r13, SPRN_SPRG_RSCRATCH3 + mfspr r12, SPRN_SPRG_RSCRATCH2 + mfspr r11, SPRN_SPRG_RSCRATCH1 + mfspr r10, SPRN_SPRG_RSCRATCH0 rfi /* Force context change */ /* diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 9196ef36d433..0552f01041ab 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -195,7 +195,7 @@ _GLOBAL(generic_secondary_smp_init) mr r3,r24 /* not found, copy phys to r3 */ b .kexec_wait /* next kernel might do better */ -2: mtspr SPRN_SPRG3,r13 /* Save vaddr of paca in SPRG3 */ +2: mtspr SPRN_SPRG_PACA,r13 /* Save vaddr of paca in an SPRG */ /* From now on, r24 is expected to be logical cpuid */ mr r24,r5 3: HMT_LOW @@ -484,7 +484,7 @@ _GLOBAL(pmac_secondary_start) LOAD_REG_ADDR(r4,paca) /* Get base vaddr of paca array */ mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ add r13,r13,r4 /* for this processor. */ - mtspr SPRN_SPRG3,r13 /* Save vaddr of paca in SPRG3 */ + mtspr SPRN_SPRG_PACA,r13 /* Save vaddr of paca in an SPRG*/ /* Create a temp kernel stack for use before relocation is on. */ ld r1,PACAEMERGSP(r13) @@ -502,10 +502,10 @@ _GLOBAL(pmac_secondary_start) * 1. Processor number * 2. Segment table pointer (virtual address) * On entry the following are set: - * r1 = stack pointer. vaddr for iSeries, raddr (temp stack) for pSeries - * r24 = cpu# (in Linux terms) - * r13 = paca virtual address - * SPRG3 = paca virtual address + * r1 = stack pointer. vaddr for iSeries, raddr (temp stack) for pSeries + * r24 = cpu# (in Linux terms) + * r13 = paca virtual address + * SPRG_PACA = paca virtual address */ .globl __secondary_start __secondary_start: @@ -641,7 +641,7 @@ _INIT_STATIC(start_here_multiplatform) /* Restore parameters passed from prom_init/kexec */ mr r3,r31 - bl .early_setup /* also sets r13 and SPRG3 */ + bl .early_setup /* also sets r13 and SPRG_PACA */ LOAD_REG_ADDR(r3, .start_here_common) ld r4,PACAKMSR(r13) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 52ff8c53b93c..6ded19d01891 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -110,8 +110,8 @@ turn_on_mmu: * task's thread_struct. */ #define EXCEPTION_PROLOG \ - mtspr SPRN_SPRG0,r10; \ - mtspr SPRN_SPRG1,r11; \ + mtspr SPRN_SPRG_SCRATCH0,r10; \ + mtspr SPRN_SPRG_SCRATCH1,r11; \ mfcr r10; \ EXCEPTION_PROLOG_1; \ EXCEPTION_PROLOG_2 @@ -121,7 +121,7 @@ turn_on_mmu: andi. r11,r11,MSR_PR; \ tophys(r11,r1); /* use tophys(r1) if kernel */ \ beq 1f; \ - mfspr r11,SPRN_SPRG3; \ + mfspr r11,SPRN_SPRG_THREAD; \ lwz r11,THREAD_INFO-THREAD(r11); \ addi r11,r11,THREAD_SIZE; \ tophys(r11,r11); \ @@ -133,9 +133,9 @@ turn_on_mmu: stw r10,_CCR(r11); /* save registers */ \ stw r12,GPR12(r11); \ stw r9,GPR9(r11); \ - mfspr r10,SPRN_SPRG0; \ + mfspr r10,SPRN_SPRG_SCRATCH0; \ stw r10,GPR10(r11); \ - mfspr r12,SPRN_SPRG1; \ + mfspr r12,SPRN_SPRG_SCRATCH1; \ stw r12,GPR11(r11); \ mflr r10; \ stw r10,_LINK(r11); \ @@ -603,8 +603,9 @@ start_here: /* ptr to phys current thread */ tophys(r4,r2) addi r4,r4,THREAD /* init task's THREAD */ - mtspr SPRN_SPRG3,r4 + mtspr SPRN_SPRG_THREAD,r4 li r3,0 + /* XXX What is that for ? SPRG2 appears otherwise unused on 8xx */ mtspr SPRN_SPRG2,r3 /* 0 => r1 has kernel sp */ /* stack */ diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 5f9febc8d143..50504ae39cb7 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -20,14 +20,14 @@ #endif #define NORMAL_EXCEPTION_PROLOG \ - mtspr SPRN_SPRG0,r10; /* save two registers to work with */\ - mtspr SPRN_SPRG1,r11; \ - mtspr SPRN_SPRG4W,r1; \ + mtspr SPRN_SPRG_WSCRATCH0,r10;/* save two registers to work with */\ + mtspr SPRN_SPRG_WSCRATCH1,r11; \ + mtspr SPRN_SPRG_WSCRATCH2,r1; \ mfcr r10; /* save CR in r10 for now */\ mfspr r11,SPRN_SRR1; /* check whether user or kernel */\ andi. r11,r11,MSR_PR; \ beq 1f; \ - mfspr r1,SPRN_SPRG3; /* if from user, start at top of */\ + mfspr r1,SPRN_SPRG_THREAD; /* if from user, start at top of */\ lwz r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack */\ ALLOC_STACK_FRAME(r1, THREAD_SIZE); \ 1: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\ @@ -35,13 +35,13 @@ stw r10,_CCR(r11); /* save various registers */\ stw r12,GPR12(r11); \ stw r9,GPR9(r11); \ - mfspr r10,SPRN_SPRG0; \ + mfspr r10,SPRN_SPRG_RSCRATCH0; \ stw r10,GPR10(r11); \ - mfspr r12,SPRN_SPRG1; \ + mfspr r12,SPRN_SPRG_RSCRATCH1; \ stw r12,GPR11(r11); \ mflr r10; \ stw r10,_LINK(r11); \ - mfspr r10,SPRN_SPRG4R; \ + mfspr r10,SPRN_SPRG_RSCRATCH2; \ mfspr r12,SPRN_SRR0; \ stw r10,GPR1(r11); \ mfspr r9,SPRN_SRR1; \ @@ -69,21 +69,11 @@ * providing configurations that micro-optimize space usage. */ -/* CRIT_SPRG only used in critical exception handling */ -#define CRIT_SPRG SPRN_SPRG2 -/* MCHECK_SPRG only used in machine check exception handling */ -#define MCHECK_SPRG SPRN_SPRG6W - -#define MCHECK_STACK_BASE mcheckirq_ctx +#define MC_STACK_BASE mcheckirq_ctx #define CRIT_STACK_BASE critirq_ctx /* only on e500mc/e200 */ -#define DEBUG_STACK_BASE dbgirq_ctx -#ifdef CONFIG_E200 -#define DEBUG_SPRG SPRN_SPRG6W -#else -#define DEBUG_SPRG SPRN_SPRG9 -#endif +#define DBG_STACK_BASE dbgirq_ctx #define EXC_LVL_FRAME_OVERHEAD (THREAD_SIZE - INT_FRAME_SIZE - EXC_LVL_SIZE) @@ -110,7 +100,7 @@ * critical/machine check exception stack at low physical addresses. */ #define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, exc_level_srr0, exc_level_srr1) \ - mtspr exc_level##_SPRG,r8; \ + mtspr SPRN_SPRG_WSCRATCH_##exc_level,r8; \ BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \ stw r9,GPR9(r8); /* save various registers */\ mfcr r9; /* save CR in r9 for now */\ @@ -119,7 +109,7 @@ stw r9,_CCR(r8); /* save CR on stack */\ mfspr r10,exc_level_srr1; /* check whether user or kernel */\ andi. r10,r10,MSR_PR; \ - mfspr r11,SPRN_SPRG3; /* if from user, start at top of */\ + mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\ addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\ beq 1f; \ @@ -140,7 +130,7 @@ lwz r9,TI_TASK-EXC_LVL_FRAME_OVERHEAD(r11); \ stw r9,TI_TASK-EXC_LVL_FRAME_OVERHEAD(r8); \ mr r11,r8; \ -2: mfspr r8,exc_level##_SPRG; \ +2: mfspr r8,SPRN_SPRG_RSCRATCH_##exc_level; \ stw r12,GPR12(r11); /* save various registers */\ mflr r10; \ stw r10,_LINK(r11); \ @@ -161,9 +151,9 @@ #define CRITICAL_EXCEPTION_PROLOG \ EXC_LEVEL_EXCEPTION_PROLOG(CRIT, SPRN_CSRR0, SPRN_CSRR1) #define DEBUG_EXCEPTION_PROLOG \ - EXC_LEVEL_EXCEPTION_PROLOG(DEBUG, SPRN_DSRR0, SPRN_DSRR1) + EXC_LEVEL_EXCEPTION_PROLOG(DBG, SPRN_DSRR0, SPRN_DSRR1) #define MCHECK_EXCEPTION_PROLOG \ - EXC_LEVEL_EXCEPTION_PROLOG(MCHECK, SPRN_MCSRR0, SPRN_MCSRR1) + EXC_LEVEL_EXCEPTION_PROLOG(MC, SPRN_MCSRR0, SPRN_MCSRR1) /* * Exception vectors. @@ -282,13 +272,13 @@ label: mtspr SPRN_DSRR1,r9; \ lwz r9,GPR9(r11); \ lwz r12,GPR12(r11); \ - mtspr DEBUG_SPRG,r8; \ - BOOKE_LOAD_EXC_LEVEL_STACK(DEBUG); /* r8 points to the debug stack */ \ + mtspr SPRN_SPRG_WSCRATCH_DBG,r8; \ + BOOKE_LOAD_EXC_LEVEL_STACK(DBG); /* r8 points to the debug stack */ \ lwz r10,GPR10(r8); \ lwz r11,GPR11(r8); \ - mfspr r8,DEBUG_SPRG; \ + mfspr r8,SPRN_SPRG_RSCRATCH_DBG; \ \ - PPC_RFDI; \ + PPC_RFDI; \ b .; \ \ /* continue normal handling for a debug exception... */ \ @@ -335,11 +325,11 @@ label: mtspr SPRN_CSRR1,r9; \ lwz r9,GPR9(r11); \ lwz r12,GPR12(r11); \ - mtspr CRIT_SPRG,r8; \ + mtspr SPRN_SPRG_WSCRATCH_CRIT,r8; \ BOOKE_LOAD_EXC_LEVEL_STACK(CRIT); /* r8 points to the debug stack */ \ lwz r10,GPR10(r8); \ lwz r11,GPR11(r8); \ - mfspr r8,CRIT_SPRG; \ + mfspr r8,SPRN_SPRG_RSCRATCH_CRIT; \ \ rfci; \ b .; \ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 5bdcc06d294c..eca80482ae72 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -361,7 +361,7 @@ skpinv: addi r6,r6,1 /* Increment */ /* ptr to current thread */ addi r4,r2,THREAD /* init task's THREAD */ - mtspr SPRN_SPRG3,r4 + mtspr SPRN_SPRG_THREAD,r4 /* stack */ lis r1,init_thread_union@h @@ -532,12 +532,12 @@ interrupt_base: /* Data TLB Error Interrupt */ START_EXCEPTION(DataTLBError) - mtspr SPRN_SPRG0, r10 /* Save some working registers */ - mtspr SPRN_SPRG1, r11 - mtspr SPRN_SPRG4W, r12 - mtspr SPRN_SPRG5W, r13 + mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */ + mtspr SPRN_SPRG_WSCRATCH1, r11 + mtspr SPRN_SPRG_WSCRATCH2, r12 + mtspr SPRN_SPRG_WSCRATCH3, r13 mfcr r11 - mtspr SPRN_SPRG7W, r11 + mtspr SPRN_SPRG_WSCRATCH4, r11 mfspr r10, SPRN_DEAR /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -557,7 +557,7 @@ interrupt_base: /* Get the PGD for the current thread */ 3: - mfspr r11,SPRN_SPRG3 + mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) 4: @@ -598,12 +598,12 @@ interrupt_base: /* The bailout. Restore registers to pre-exception conditions * and call the heavyweights to help us out. */ - mfspr r11, SPRN_SPRG7R + mfspr r11, SPRN_SPRG_RSCRATCH4 mtcr r11 - mfspr r13, SPRN_SPRG5R - mfspr r12, SPRN_SPRG4R - mfspr r11, SPRN_SPRG1 - mfspr r10, SPRN_SPRG0 + mfspr r13, SPRN_SPRG_RSCRATCH3 + mfspr r12, SPRN_SPRG_RSCRATCH2 + mfspr r11, SPRN_SPRG_RSCRATCH1 + mfspr r10, SPRN_SPRG_RSCRATCH0 b DataStorage /* Instruction TLB Error Interrupt */ @@ -613,12 +613,12 @@ interrupt_base: * to a different point. */ START_EXCEPTION(InstructionTLBError) - mtspr SPRN_SPRG0, r10 /* Save some working registers */ - mtspr SPRN_SPRG1, r11 - mtspr SPRN_SPRG4W, r12 - mtspr SPRN_SPRG5W, r13 + mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */ + mtspr SPRN_SPRG_WSCRATCH1, r11 + mtspr SPRN_SPRG_WSCRATCH2, r12 + mtspr SPRN_SPRG_WSCRATCH3, r13 mfcr r11 - mtspr SPRN_SPRG7W, r11 + mtspr SPRN_SPRG_WSCRATCH4, r11 mfspr r10, SPRN_SRR0 /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -638,7 +638,7 @@ interrupt_base: /* Get the PGD for the current thread */ 3: - mfspr r11,SPRN_SPRG3 + mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) 4: @@ -666,12 +666,12 @@ interrupt_base: /* The bailout. Restore registers to pre-exception conditions * and call the heavyweights to help us out. */ - mfspr r11, SPRN_SPRG7R + mfspr r11, SPRN_SPRG_RSCRATCH4 mtcr r11 - mfspr r13, SPRN_SPRG5R - mfspr r12, SPRN_SPRG4R - mfspr r11, SPRN_SPRG1 - mfspr r10, SPRN_SPRG0 + mfspr r13, SPRN_SPRG_RSCRATCH3 + mfspr r12, SPRN_SPRG_RSCRATCH2 + mfspr r11, SPRN_SPRG_RSCRATCH1 + mfspr r10, SPRN_SPRG_RSCRATCH0 b InstructionStorage #ifdef CONFIG_SPE @@ -790,12 +790,12 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) tlbwe /* Done...restore registers and get out of here. */ - mfspr r11, SPRN_SPRG7R + mfspr r11, SPRN_SPRG_RSCRATCH4 mtcr r11 - mfspr r13, SPRN_SPRG5R - mfspr r12, SPRN_SPRG4R - mfspr r11, SPRN_SPRG1 - mfspr r10, SPRN_SPRG0 + mfspr r13, SPRN_SPRG_RSCRATCH3 + mfspr r12, SPRN_SPRG_RSCRATCH2 + mfspr r11, SPRN_SPRG_RSCRATCH1 + mfspr r10, SPRN_SPRG_RSCRATCH0 rfi /* Force context change */ #ifdef CONFIG_SPE @@ -839,7 +839,7 @@ load_up_spe: #endif /* !CONFIG_SMP */ /* enable use of SPE after return */ oris r9,r9,MSR_SPE@h - mfspr r5,SPRN_SPRG3 /* current task's THREAD (phys) */ + mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ li r4,1 li r10,THREAD_ACC stw r4,THREAD_USED_SPE(r5) @@ -1118,7 +1118,7 @@ __secondary_start: /* ptr to current thread */ addi r4,r2,THREAD /* address of our thread_struct */ - mtspr SPRN_SPRG3,r4 + mtspr SPRN_SPRG_THREAD,r4 /* Setup the defaults for TLB entries */ li r4,(MAS4_TSIZED(BOOK3E_PAGESZ_4K))@l diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 1f6816003ebe..91b89b8d63d8 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -142,11 +142,11 @@ early_param("smt-enabled", early_smt_enabled); #define check_smt_enabled() #endif /* CONFIG_SMP */ -/* Put the paca pointer into r13 and SPRG3 */ +/* Put the paca pointer into r13 and SPRG_PACA */ void __init setup_paca(int cpu) { local_paca = &paca[cpu]; - mtspr(SPRN_SPRG3, local_paca); + mtspr(SPRN_SPRG_PACA, local_paca); } /* diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index ea4d64644d02..67b6916f0e94 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -65,7 +65,7 @@ _GLOBAL(load_up_altivec) 1: /* enable use of VMX after return */ #ifdef CONFIG_PPC32 - mfspr r5,SPRN_SPRG3 /* current task's THREAD (phys) */ + mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ oris r9,r9,MSR_VEC@h #else ld r4,PACACURRENT(r13) -- cgit From c5a8c0c99f67ae8a784faafbaaea1529825796e2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 16 Jul 2009 19:36:57 +0000 Subject: powerpc: Remove use of a second scratch SPRG in STAB code The STAB code used on Power3 and RS/64 uses a second scratch SPRG to save a GPR in order to decide whether to go to do_stab_bolted_* or to handle a normal data access exception. This prevents our scheme of freeing SPRG3 which is user visible for user uses since we cannot use SPRG0 which, on RS/64, seems to be read-only for supervisor mode (like POWER4). This reworks the STAB exception entry to use the PACA as temporary storage instead. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64s.S | 38 ++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 4e9640cc0563..50f2ad36ed09 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -50,18 +50,28 @@ data_access_pSeries: HMT_MEDIUM mtspr SPRN_SPRG_SCRATCH0,r13 BEGIN_FTR_SECTION - mtspr SPRN_SPRG_SCRATCH1,r12 - mfspr r13,SPRN_DAR - mfspr r12,SPRN_DSISR - srdi r13,r13,60 - rlwimi r13,r12,16,0x20 - mfcr r12 - cmpwi r13,0x2c + mfspr r13,SPRN_SPRG_PACA + std r9,PACA_EXSLB+EX_R9(r13) + std r10,PACA_EXSLB+EX_R10(r13) + mfspr r10,SPRN_DAR + mfspr r9,SPRN_DSISR + srdi r10,r10,60 + rlwimi r10,r9,16,0x20 + mfcr r9 + cmpwi r10,0x2c beq do_stab_bolted_pSeries - mtcrf 0x80,r12 - mfspr r12,SPRN_SPRG_SCRATCH1 -END_FTR_SECTION_IFCLR(CPU_FTR_SLB) + ld r10,PACA_EXSLB+EX_R10(r13) + std r11,PACA_EXGEN+EX_R11(r13) + ld r11,PACA_EXSLB+EX_R9(r13) + std r12,PACA_EXGEN+EX_R12(r13) + mfspr r12,SPRN_SPRG_SCRATCH0 + std r10,PACA_EXGEN+EX_R10(r13) + std r11,PACA_EXGEN+EX_R9(r13) + std r12,PACA_EXGEN+EX_R13(r13) + EXCEPTION_PROLOG_PSERIES_1(data_access_common) +FTR_SECTION_ELSE EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common) +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB) . = 0x380 .globl data_access_slb_pSeries @@ -224,9 +234,11 @@ masked_interrupt: .align 7 do_stab_bolted_pSeries: - mtcrf 0x80,r12 - mfspr r12,SPRN_SPRG_SCRATCH1 - EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) + std r11,PACA_EXSLB+EX_R11(r13) + std r12,PACA_EXSLB+EX_R12(r13) + mfspr r10,SPRN_SPRG_SCRATCH0 + std r10,PACA_EXSLB+EX_R13(r13) + EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted) #ifdef CONFIG_PPC_PSERIES /* -- cgit From dd90bbd5fb763ab8924135a30956030c7a7b94fc Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 28 Jul 2009 11:54:32 +1000 Subject: powerpc: Add compat_sys_truncate The truncate syscall has a signed long parameter, so when using a 32- bit userspace with a 64-bit kernel the argument is zero-extended instead of sign-extended. Adding the compat_sys_truncate function fixes the issue. This was noticed during an LSB truncate test failure. The test was checking for the correct error number set when truncate is called with a length of -1. The test can be found at: http://bzr.linuxfoundation.org/lsb/devel/runtime-test?cmd=inventory;rev=stewb%40linux-foundation.org-20090626205411-sfb23cc0tjj7jzgm;path=modules/vsx-pcts/tset/POSIX.os/files/truncate/ BenH: Added compat_sys_ftruncate() as well, same issue. Signed-off-by: Chase Douglas Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/sys_ppc32.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index bb1cfcfdbbbb..1cc5e9e5da96 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -343,6 +343,18 @@ off_t ppc32_lseek(unsigned int fd, u32 offset, unsigned int origin) return sys_lseek(fd, (int)offset, origin); } +long compat_sys_truncate(const char __user * path, u32 length) +{ + /* sign extend length */ + return sys_truncate(path, (int)length); +} + +long compat_sys_ftruncate(int fd, u32 length) +{ + /* sign extend length */ + return sys_ftruncate(fd, (int)length); +} + /* Note: it is necessary to treat bufsiz as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) -- cgit From 6c1719942e19936044c4673b18afa26e45a02320 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 23 Jul 2009 23:15:07 +0000 Subject: powerpc/of: Remove useless register save/restore when calling OF back enter_prom() used to save and restore registers such as CTR, XER etc.. which are volatile, or SRR0,1... which we don't care about. This removes a bunch of useless code and while at it turns an mtmsrd into an MTMSRD macro which will be useful to Book3E. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/entry_64.S | 38 ++++++-------------------------------- 1 file changed, 6 insertions(+), 32 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index dbf0e3115611..1cb0f3d1714b 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -823,30 +823,17 @@ _GLOBAL(enter_prom) * of all registers that it saves. We therefore save those registers * PROM might touch to the stack. (r0, r3-r13 are caller saved) */ - SAVE_8GPRS(2, r1) + SAVE_GPR(2, r1) SAVE_GPR(13, r1) SAVE_8GPRS(14, r1) SAVE_10GPRS(22, r1) - mfcr r4 - std r4,_CCR(r1) - mfctr r5 - std r5,_CTR(r1) - mfspr r6,SPRN_XER - std r6,_XER(r1) - mfdar r7 - std r7,_DAR(r1) - mfdsisr r8 - std r8,_DSISR(r1) - mfsrr0 r9 - std r9,_SRR0(r1) - mfsrr1 r10 - std r10,_SRR1(r1) + mfcr r10 mfmsr r11 + std r10,_CCR(r1) std r11,_MSR(r1) /* Get the PROM entrypoint */ - ld r0,GPR4(r1) - mtlr r0 + mtlr r4 /* Switch MSR to 32 bits mode */ @@ -860,8 +847,7 @@ _GLOBAL(enter_prom) mtmsrd r11 isync - /* Restore arguments & enter PROM here... */ - ld r3,GPR3(r1) + /* Enter PROM here... */ blrl /* Just make sure that r1 top 32 bits didn't get @@ -871,7 +857,7 @@ _GLOBAL(enter_prom) /* Restore the MSR (back to 64 bits) */ ld r0,_MSR(r1) - mtmsrd r0 + MTMSRD(r0) isync /* Restore other registers */ @@ -881,18 +867,6 @@ _GLOBAL(enter_prom) REST_10GPRS(22, r1) ld r4,_CCR(r1) mtcr r4 - ld r5,_CTR(r1) - mtctr r5 - ld r6,_XER(r1) - mtspr SPRN_XER,r6 - ld r7,_DAR(r1) - mtdar r7 - ld r8,_DSISR(r1) - mtdsisr r8 - ld r9,_SRR0(r1) - mtsrr0 r9 - ld r10,_SRR1(r1) - mtsrr1 r10 addi r1,r1,PROM_FRAME_SIZE ld r0,16(r1) -- cgit From 6f0ef0f505af1ce6e9756087a9d4cc3778bae8c6 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 23 Jul 2009 23:15:26 +0000 Subject: powerpc/mm: Call mmu_context_init() from ppc64 Our 64-bit hash context handling has no init function, but 64-bit Book3E will use the common mmu_context_nohash.c code which does, so define an empty inline mmu_context_init() for 64-bit server and call it from our 64-bit setup_arch() Signed-off-by: Benjamin Herrenschmidt Acked-by: Kumar Gala --- arch/powerpc/kernel/setup_64.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 91b89b8d63d8..325dc5b2e626 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -534,6 +534,10 @@ void __init setup_arch(char **cmdline_p) #endif paging_init(); + + /* Initialize the MMU context management stuff */ + mmu_context_init(); + ppc64_boot_msg(0x15, "Setup Done"); } -- cgit From 747bea91b764aefd59091ebff80f182282f1d23c Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 23 Jul 2009 23:15:27 +0000 Subject: powerpc: Clean ifdef usage in copy_thread() Currently, a single ifdef covers SLB related bits and more generic ppc64 related bits, split this in two separate ifdef's since 64-bit BookE will need one but not the other. Signed-off-by: Benjamin Herrenschmidt Acked-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 892a9f2e6d76..678ff132e8b0 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -664,6 +664,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, sp_vsid |= SLB_VSID_KERNEL | llp; p->thread.ksp_vsid = sp_vsid; } +#endif /* CONFIG_PPC_STD_MMU_64 */ /* * The PPC64 ABI makes use of a TOC to contain function @@ -671,6 +672,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, * to the TOC entry. The first entry is a pointer to the actual * function. */ +#ifdef CONFIG_PPC64 kregs->nip = *((unsigned long *)ret_from_fork); #else kregs->nip = (unsigned long)ret_from_fork; -- cgit From cf54dc7cd4f9aab55cd3e1794b0b74c3c88cd1a0 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 23 Jul 2009 23:15:28 +0000 Subject: powerpc: Move definitions of secondary CPU spinloop to header file Those definitions are currently declared extern in the .c file where they are used, move them to a header file instead. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom_init.c | 4 ---- arch/powerpc/kernel/setup_64.c | 3 --- 2 files changed, 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index a538824616fd..d942404779c1 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1259,10 +1259,6 @@ static void __init prom_initialize_tce_table(void) * * -- Cort */ -extern char __secondary_hold; -extern unsigned long __secondary_hold_spinloop; -extern unsigned long __secondary_hold_acknowledge; - /* * We want to reference the copy of __secondary_hold_* in the * 0 - 0x100 address range diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 325dc5b2e626..a6b6c4c9ae41 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -230,9 +230,6 @@ void early_setup_secondary(void) #endif /* CONFIG_SMP */ #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC) -extern unsigned long __secondary_hold_spinloop; -extern void generic_secondary_smp_init(void); - void smp_release_cpus(void) { unsigned long *ptr; -- cgit From 57e2a99f74b0d3720c97a6aadb57ae6aad3c61ea Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 28 Jul 2009 11:59:34 +1000 Subject: powerpc: Add memory management headers for new 64-bit BookE This adds the PTE and pgtable format definitions, along with changes to the kernel memory map and other definitions related to implementing support for 64-bit Book3E. This also shields some asm-offset bits that are currently only relevant on 32-bit We also move the definition of the "linux" page size constants to the common mmu.h file and add a few sizes that are relevant to embedded processors. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/asm-offsets.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 561b64652311..0a9f30b54952 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -52,9 +52,11 @@ #include #endif +#ifdef CONFIG_PPC32 #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) #include "head_booke.h" #endif +#endif #if defined(CONFIG_FSL_BOOKE) #include "../mm/mmu_decl.h" @@ -260,6 +262,7 @@ int main(void) DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8); #endif /* CONFIG_PPC64 */ +#if defined(CONFIG_PPC32) #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE); DEFINE(MAS0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0)); @@ -278,7 +281,7 @@ int main(void) DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1)); DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, saved_ksp_limit)); #endif - +#endif DEFINE(CLONE_VM, CLONE_VM); DEFINE(CLONE_UNTRACED, CLONE_UNTRACED); -- cgit From dce6670aaa7efece0558010b48d5ef9d421154be Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 23 Jul 2009 23:15:42 +0000 Subject: powerpc: Add PACA fields specific to 64-bit Book3E processors This adds various fields in the PACA that are for use specifically by Book3E processors, such as exception save areas, current pgd pointer, special exceptions kernel stacks etc... Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/asm-offsets.c | 14 ++++++++++++++ arch/powerpc/kernel/paca.c | 3 +++ 2 files changed, 17 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 0a9f30b54952..b9e010d0fc91 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -140,6 +140,20 @@ int main(void) context.high_slices_psize)); DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def)); #endif /* CONFIG_PPC_MM_SLICES */ + +#ifdef CONFIG_PPC_BOOK3E + DEFINE(PACAPGD, offsetof(struct paca_struct, pgd)); + DEFINE(PACA_KERNELPGD, offsetof(struct paca_struct, kernel_pgd)); + DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); + DEFINE(PACA_EXTLB, offsetof(struct paca_struct, extlb)); + DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc)); + DEFINE(PACA_EXCRIT, offsetof(struct paca_struct, excrit)); + DEFINE(PACA_EXDBG, offsetof(struct paca_struct, exdbg)); + DEFINE(PACA_MC_STACK, offsetof(struct paca_struct, mc_kstack)); + DEFINE(PACA_CRIT_STACK, offsetof(struct paca_struct, crit_kstack)); + DEFINE(PACA_DBG_STACK, offsetof(struct paca_struct, dbg_kstack)); +#endif /* CONFIG_PPC_BOOK3E */ + #ifdef CONFIG_PPC_STD_MMU_64 DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real)); DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr)); diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index e9962c7f8a09..d16b1ea55d44 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -13,6 +13,7 @@ #include #include #include +#include /* This symbol is provided by the linker - let it fill in the paca * field correctly */ @@ -87,6 +88,8 @@ void __init initialise_pacas(void) #ifdef CONFIG_PPC_BOOK3S new_paca->lppaca_ptr = &lppaca[cpu]; +#else + new_paca->kernel_pgd = swapper_pg_dir; #endif new_paca->lock_token = 0x8000; new_paca->paca_index = cpu; -- cgit From 25d21ad6e799cccd097b9df2a2fefe19a7e1dfcf Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 23 Jul 2009 23:15:47 +0000 Subject: powerpc: Add TLB management code for 64-bit Book3E This adds the TLB miss handler assembly, the low level TLB flush routines along with the necessary hook for dealing with our virtual page tables or indirect TLB entries that need to be flushes when PTE pages are freed. There is currently no support for hugetlbfs Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup_64.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index a6b6c4c9ae41..65aced7b833a 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -62,6 +62,7 @@ #include #include #include +#include #include "setup.h" @@ -147,6 +148,9 @@ void __init setup_paca(int cpu) { local_paca = &paca[cpu]; mtspr(SPRN_SPRG_PACA, local_paca); +#ifdef CONFIG_PPC_BOOK3E + mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); +#endif } /* -- cgit From 2d27cfd3286966c04d4192a9db5a6c7ea60eebf1 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 23 Jul 2009 23:15:59 +0000 Subject: powerpc: Remaining 64-bit Book3E support This contains all the bits that didn't fit in previous patches :-) This includes the actual exception handlers assembly, the changes to the kernel entry, other misc bits and wiring it all up in Kconfig. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/Makefile | 10 +- arch/powerpc/kernel/cputable.c | 27 +- arch/powerpc/kernel/entry_64.S | 60 ++- arch/powerpc/kernel/exceptions-64e.S | 784 +++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/head_64.S | 68 ++- arch/powerpc/kernel/setup_64.c | 19 + 6 files changed, 951 insertions(+), 17 deletions(-) create mode 100644 arch/powerpc/kernel/exceptions-64e.S (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index b73396b93905..035946f9d5fb 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -33,10 +33,10 @@ obj-y := cputable.o ptrace.o syscalls.o \ obj-y += vdso32/ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ signal_64.o ptrace32.o \ - paca.o cpu_setup_ppc970.o \ - cpu_setup_pa6t.o \ - firmware.o nvram_64.o + paca.o nvram_64.o firmware.o +obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj64-$(CONFIG_RELOCATABLE) += reloc_64.o +obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_970_NAP) += idle_power4.o @@ -63,8 +63,8 @@ obj-$(CONFIG_MODULES) += module.o module_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_44x) += cpu_setup_44x.o obj-$(CONFIG_FSL_BOOKE) += cpu_setup_fsl_booke.o dbell.o -extra-$(CONFIG_PPC_STD_MMU) := head_32.o -extra-$(CONFIG_PPC64) := head_64.o +extra-y := head_$(CONFIG_WORD_SIZE).o +extra-$(CONFIG_PPC_BOOK3E_32) := head_new_booke.o extra-$(CONFIG_40x) := head_40x.o extra-$(CONFIG_44x) := head_44x.o extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 4a24a2fc4574..f34ea37079b5 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -93,7 +93,7 @@ extern void __restore_cpu_power7(void); PPC_FEATURE_BOOKE) static struct cpu_spec __initdata cpu_specs[] = { -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 { /* Power3 */ .pvr_mask = 0xffff0000, .pvr_value = 0x00400000, @@ -508,7 +508,30 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "power4", } -#endif /* CONFIG_PPC64 */ +#endif /* CONFIG_PPC_BOOK3S_64 */ +#ifdef CONFIG_PPC_BOOK3E_64 + { /* This is a default entry to get going, to be replaced by + * a real one at some stage + */ +#define CPU_FTRS_BASE_BOOK3E (CPU_FTR_USE_TB | \ + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_SMT | \ + CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "Book3E", + .cpu_features = CPU_FTRS_BASE_BOOK3E, + .cpu_user_features = COMMON_USER_PPC64, + .mmu_features = MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX | + MMU_FTR_USE_TLBIVAX_BCAST | + MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 0, + .machine_check = machine_check_generic, + .platform = "power6", + }, +#endif + #ifdef CONFIG_PPC32 #if CLASSIC_PPC { /* 601 */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 1cb0f3d1714b..66bcda34a6bb 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -120,9 +120,15 @@ BEGIN_FW_FTR_SECTION 2: END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) #endif /* CONFIG_PPC_ISERIES */ + + /* Hard enable interrupts */ +#ifdef CONFIG_PPC_BOOK3E + wrteei 1 +#else mfmsr r11 ori r11,r11,MSR_EE mtmsrd r11,1 +#endif /* CONFIG_PPC_BOOK3E */ #ifdef SHOW_SYSCALLS bl .do_show_syscall @@ -168,15 +174,25 @@ syscall_exit: #endif clrrdi r12,r1,THREAD_SHIFT - /* disable interrupts so current_thread_info()->flags can't change, - and so that we don't get interrupted after loading SRR0/1. */ ld r8,_MSR(r1) +#ifdef CONFIG_PPC_BOOK3S + /* No MSR:RI on BookE */ andi. r10,r8,MSR_RI beq- unrecov_restore +#endif + + /* Disable interrupts so current_thread_info()->flags can't change, + * and so that we don't get interrupted after loading SRR0/1. + */ +#ifdef CONFIG_PPC_BOOK3E + wrteei 0 +#else mfmsr r10 rldicl r10,r10,48,1 rotldi r10,r10,16 mtmsrd r10,1 +#endif /* CONFIG_PPC_BOOK3E */ + ld r9,TI_FLAGS(r12) li r11,-_LAST_ERRNO andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) @@ -194,9 +210,13 @@ syscall_error_cont: * userspace and we take an exception after restoring r13, * we end up corrupting the userspace r13 value. */ +#ifdef CONFIG_PPC_BOOK3S + /* No MSR:RI on BookE */ li r12,MSR_RI andc r11,r10,r12 mtmsrd r11,1 /* clear MSR.RI */ +#endif /* CONFIG_PPC_BOOK3S */ + beq- 1f ACCOUNT_CPU_USER_EXIT(r11, r12) ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ @@ -206,7 +226,7 @@ syscall_error_cont: mtcr r5 mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r8 - rfid + RFI b . /* prevent speculative execution */ syscall_error: @@ -276,9 +296,13 @@ syscall_exit_work: beq .ret_from_except_lite /* Re-enable interrupts */ +#ifdef CONFIG_PPC_BOOK3E + wrteei 1 +#else mfmsr r10 ori r10,r10,MSR_EE mtmsrd r10,1 +#endif /* CONFIG_PPC_BOOK3E */ bl .save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD @@ -380,7 +404,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) and. r0,r0,r22 beq+ 1f andc r22,r22,r0 - mtmsrd r22 + MTMSRD(r22) isync 1: std r20,_NIP(r1) mfcr r23 @@ -399,6 +423,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) std r6,PACACURRENT(r13) /* Set new 'current' */ ld r8,KSP(r4) /* new stack pointer */ +#ifdef CONFIG_PPC_BOOK3S BEGIN_FTR_SECTION BEGIN_FTR_SECTION_NESTED(95) clrrdi r6,r8,28 /* get its ESID */ @@ -445,8 +470,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) slbie r6 /* Workaround POWER5 < DD2.1 issue */ slbmte r7,r0 isync - 2: +#endif /* !CONFIG_PPC_BOOK3S */ + clrrdi r7,r8,THREAD_SHIFT /* base of new stack */ /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE because we don't need to leave the 288-byte ABI gap at the @@ -490,10 +516,14 @@ _GLOBAL(ret_from_except_lite) * can't change between when we test it and when we return * from the interrupt. */ +#ifdef CONFIG_PPC_BOOK3E + wrteei 0 +#else mfmsr r10 /* Get current interrupt state */ rldicl r9,r10,48,1 /* clear MSR_EE */ rotldi r9,r9,16 mtmsrd r9,1 /* Update machine state */ +#endif /* CONFIG_PPC_BOOK3E */ #ifdef CONFIG_PREEMPT clrrdi r9,r1,THREAD_SHIFT /* current_thread_info() */ @@ -540,6 +570,9 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ stb r4,PACAHARDIRQEN(r13) +#ifdef CONFIG_PPC_BOOK3E + b .exception_return_book3e +#else ld r4,_CTR(r1) ld r0,_LINK(r1) mtctr r4 @@ -588,6 +621,8 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) rfid b . /* prevent speculative execution */ +#endif /* CONFIG_PPC_BOOK3E */ + iseries_check_pending_irqs: #ifdef CONFIG_PPC_ISERIES ld r5,SOFTE(r1) @@ -638,6 +673,11 @@ do_work: li r0,1 stb r0,PACASOFTIRQEN(r13) stb r0,PACAHARDIRQEN(r13) +#ifdef CONFIG_PPC_BOOK3E + wrteei 1 + bl .preempt_schedule + wrteei 0 +#else ori r10,r10,MSR_EE mtmsrd r10,1 /* reenable interrupts */ bl .preempt_schedule @@ -646,6 +686,7 @@ do_work: rldicl r10,r10,48,1 /* disable interrupts again */ rotldi r10,r10,16 mtmsrd r10,1 +#endif /* CONFIG_PPC_BOOK3E */ ld r4,TI_FLAGS(r9) andi. r0,r4,_TIF_NEED_RESCHED bne 1b @@ -654,8 +695,12 @@ do_work: user_work: #endif /* Enable interrupts */ +#ifdef CONFIG_PPC_BOOK3E + wrteei 1 +#else ori r10,r10,MSR_EE mtmsrd r10,1 +#endif /* CONFIG_PPC_BOOK3E */ andi. r0,r4,_TIF_NEED_RESCHED beq 1f @@ -837,6 +882,10 @@ _GLOBAL(enter_prom) /* Switch MSR to 32 bits mode */ +#ifdef CONFIG_PPC_BOOK3E + rlwinm r11,r11,0,1,31 + mtmsr r11 +#else /* CONFIG_PPC_BOOK3E */ mfmsr r11 li r12,1 rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG) @@ -845,6 +894,7 @@ _GLOBAL(enter_prom) rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG) andc r11,r11,r12 mtmsrd r11 +#endif /* CONFIG_PPC_BOOK3E */ isync /* Enter PROM here... */ diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S new file mode 100644 index 000000000000..695d4847d228 --- /dev/null +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -0,0 +1,784 @@ +/* + * Boot code and exception vectors for Book3E processors + * + * Copyright (C) 2007 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* XXX This will ultimately add space for a special exception save + * structure used to save things like SRR0/SRR1, SPRGs, MAS, etc... + * when taking special interrupts. For now we don't support that, + * special interrupts from within a non-standard level will probably + * blow you up + */ +#define SPECIAL_EXC_FRAME_SIZE INT_FRAME_SIZE + +/* Exception prolog code for all exceptions */ +#define EXCEPTION_PROLOG(n, type, addition) \ + mtspr SPRN_SPRG_##type##_SCRATCH,r13; /* get spare registers */ \ + mfspr r13,SPRN_SPRG_PACA; /* get PACA */ \ + std r10,PACA_EX##type+EX_R10(r13); \ + std r11,PACA_EX##type+EX_R11(r13); \ + mfcr r10; /* save CR */ \ + addition; /* additional code for that exc. */ \ + std r1,PACA_EX##type+EX_R1(r13); /* save old r1 in the PACA */ \ + stw r10,PACA_EX##type+EX_CR(r13); /* save old CR in the PACA */ \ + mfspr r11,SPRN_##type##_SRR1;/* what are we coming from */ \ + type##_SET_KSTACK; /* get special stack if necessary */\ + andi. r10,r11,MSR_PR; /* save stack pointer */ \ + beq 1f; /* branch around if supervisor */ \ + ld r1,PACAKSAVE(r13); /* get kernel stack coming from usr */\ +1: cmpdi cr1,r1,0; /* check if SP makes sense */ \ + bge- cr1,exc_##n##_bad_stack;/* bad stack (TODO: out of line) */ \ + mfspr r10,SPRN_##type##_SRR0; /* read SRR0 before touching stack */ + +/* Exception type-specific macros */ +#define GEN_SET_KSTACK \ + subi r1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */ +#define SPRN_GEN_SRR0 SPRN_SRR0 +#define SPRN_GEN_SRR1 SPRN_SRR1 + +#define CRIT_SET_KSTACK \ + ld r1,PACA_CRIT_STACK(r13); \ + subi r1,r1,SPECIAL_EXC_FRAME_SIZE; +#define SPRN_CRIT_SRR0 SPRN_CSRR0 +#define SPRN_CRIT_SRR1 SPRN_CSRR1 + +#define DBG_SET_KSTACK \ + ld r1,PACA_DBG_STACK(r13); \ + subi r1,r1,SPECIAL_EXC_FRAME_SIZE; +#define SPRN_DBG_SRR0 SPRN_DSRR0 +#define SPRN_DBG_SRR1 SPRN_DSRR1 + +#define MC_SET_KSTACK \ + ld r1,PACA_MC_STACK(r13); \ + subi r1,r1,SPECIAL_EXC_FRAME_SIZE; +#define SPRN_MC_SRR0 SPRN_MCSRR0 +#define SPRN_MC_SRR1 SPRN_MCSRR1 + +#define NORMAL_EXCEPTION_PROLOG(n, addition) \ + EXCEPTION_PROLOG(n, GEN, addition##_GEN) + +#define CRIT_EXCEPTION_PROLOG(n, addition) \ + EXCEPTION_PROLOG(n, CRIT, addition##_CRIT) + +#define DBG_EXCEPTION_PROLOG(n, addition) \ + EXCEPTION_PROLOG(n, DBG, addition##_DBG) + +#define MC_EXCEPTION_PROLOG(n, addition) \ + EXCEPTION_PROLOG(n, MC, addition##_MC) + + +/* Variants of the "addition" argument for the prolog + */ +#define PROLOG_ADDITION_NONE_GEN +#define PROLOG_ADDITION_NONE_CRIT +#define PROLOG_ADDITION_NONE_DBG +#define PROLOG_ADDITION_NONE_MC + +#define PROLOG_ADDITION_MASKABLE_GEN \ + lbz r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \ + cmpwi cr0,r11,0; /* yes -> go out of line */ \ + beq masked_interrupt_book3e; + +#define PROLOG_ADDITION_2REGS_GEN \ + std r14,PACA_EXGEN+EX_R14(r13); \ + std r15,PACA_EXGEN+EX_R15(r13) + +#define PROLOG_ADDITION_1REG_GEN \ + std r14,PACA_EXGEN+EX_R14(r13); + +#define PROLOG_ADDITION_2REGS_CRIT \ + std r14,PACA_EXCRIT+EX_R14(r13); \ + std r15,PACA_EXCRIT+EX_R15(r13) + +#define PROLOG_ADDITION_2REGS_DBG \ + std r14,PACA_EXDBG+EX_R14(r13); \ + std r15,PACA_EXDBG+EX_R15(r13) + +#define PROLOG_ADDITION_2REGS_MC \ + std r14,PACA_EXMC+EX_R14(r13); \ + std r15,PACA_EXMC+EX_R15(r13) + +/* Core exception code for all exceptions except TLB misses. + * XXX: Needs to make SPRN_SPRG_GEN depend on exception type + */ +#define EXCEPTION_COMMON(n, excf, ints) \ + std r0,GPR0(r1); /* save r0 in stackframe */ \ + std r2,GPR2(r1); /* save r2 in stackframe */ \ + SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ + SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ + std r9,GPR9(r1); /* save r9 in stackframe */ \ + std r10,_NIP(r1); /* save SRR0 to stackframe */ \ + std r11,_MSR(r1); /* save SRR1 to stackframe */ \ + ACCOUNT_CPU_USER_ENTRY(r10,r11);/* accounting (uses cr0+eq) */ \ + ld r3,excf+EX_R10(r13); /* get back r10 */ \ + ld r4,excf+EX_R11(r13); /* get back r11 */ \ + mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 */ \ + std r12,GPR12(r1); /* save r12 in stackframe */ \ + ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \ + mflr r6; /* save LR in stackframe */ \ + mfctr r7; /* save CTR in stackframe */ \ + mfspr r8,SPRN_XER; /* save XER in stackframe */ \ + ld r9,excf+EX_R1(r13); /* load orig r1 back from PACA */ \ + lwz r10,excf+EX_CR(r13); /* load orig CR back from PACA */ \ + lbz r11,PACASOFTIRQEN(r13); /* get current IRQ softe */ \ + ld r12,exception_marker@toc(r2); \ + li r0,0; \ + std r3,GPR10(r1); /* save r10 to stackframe */ \ + std r4,GPR11(r1); /* save r11 to stackframe */ \ + std r5,GPR13(r1); /* save it to stackframe */ \ + std r6,_LINK(r1); \ + std r7,_CTR(r1); \ + std r8,_XER(r1); \ + li r3,(n)+1; /* indicate partial regs in trap */ \ + std r9,0(r1); /* store stack frame back link */ \ + std r10,_CCR(r1); /* store orig CR in stackframe */ \ + std r9,GPR1(r1); /* store stack frame back link */ \ + std r11,SOFTE(r1); /* and save it to stackframe */ \ + std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \ + std r3,_TRAP(r1); /* set trap number */ \ + std r0,RESULT(r1); /* clear regs->result */ \ + ints; + +/* Variants for the "ints" argument */ +#define INTS_KEEP +#define INTS_DISABLE_SOFT \ + stb r0,PACASOFTIRQEN(r13); /* mark interrupts soft-disabled */ \ + TRACE_DISABLE_INTS; +#define INTS_DISABLE_HARD \ + stb r0,PACAHARDIRQEN(r13); /* and hard disabled */ +#define INTS_DISABLE_ALL \ + INTS_DISABLE_SOFT \ + INTS_DISABLE_HARD + +/* This is called by exceptions that used INTS_KEEP (that is did not clear + * neither soft nor hard IRQ indicators in the PACA. This will restore MSR:EE + * to it's previous value + * + * XXX In the long run, we may want to open-code it in order to separate the + * load from the wrtee, thus limiting the latency caused by the dependency + * but at this point, I'll favor code clarity until we have a near to final + * implementation + */ +#define INTS_RESTORE_HARD \ + ld r11,_MSR(r1); \ + wrtee r11; + +/* XXX FIXME: Restore r14/r15 when necessary */ +#define BAD_STACK_TRAMPOLINE(n) \ +exc_##n##_bad_stack: \ + li r1,(n); /* get exception number */ \ + sth r1,PACA_TRAP_SAVE(r13); /* store trap */ \ + b bad_stack_book3e; /* bad stack error */ + +#define EXCEPTION_STUB(loc, label) \ + . = interrupt_base_book3e + loc; \ + nop; /* To make debug interrupts happy */ \ + b exc_##label##_book3e; + +#define ACK_NONE(r) +#define ACK_DEC(r) \ + lis r,TSR_DIS@h; \ + mtspr SPRN_TSR,r +#define ACK_FIT(r) \ + lis r,TSR_FIS@h; \ + mtspr SPRN_TSR,r + +#define MASKABLE_EXCEPTION(trapnum, label, hdlr, ack) \ + START_EXCEPTION(label); \ + NORMAL_EXCEPTION_PROLOG(trapnum, PROLOG_ADDITION_MASKABLE) \ + EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE_ALL) \ + ack(r8); \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr; \ + b .ret_from_except_lite; + +/* This value is used to mark exception frames on the stack. */ + .section ".toc","aw" +exception_marker: + .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER + + +/* + * And here we have the exception vectors ! + */ + + .text + .balign 0x1000 + .globl interrupt_base_book3e +interrupt_base_book3e: /* fake trap */ + /* Note: If real debug exceptions are supported by the HW, the vector + * below will have to be patched up to point to an appropriate handler + */ + EXCEPTION_STUB(0x000, machine_check) /* 0x0200 */ + EXCEPTION_STUB(0x020, critical_input) /* 0x0580 */ + EXCEPTION_STUB(0x040, debug_crit) /* 0x0d00 */ + EXCEPTION_STUB(0x060, data_storage) /* 0x0300 */ + EXCEPTION_STUB(0x080, instruction_storage) /* 0x0400 */ + EXCEPTION_STUB(0x0a0, external_input) /* 0x0500 */ + EXCEPTION_STUB(0x0c0, alignment) /* 0x0600 */ + EXCEPTION_STUB(0x0e0, program) /* 0x0700 */ + EXCEPTION_STUB(0x100, fp_unavailable) /* 0x0800 */ + EXCEPTION_STUB(0x120, system_call) /* 0x0c00 */ + EXCEPTION_STUB(0x140, ap_unavailable) /* 0x0f20 */ + EXCEPTION_STUB(0x160, decrementer) /* 0x0900 */ + EXCEPTION_STUB(0x180, fixed_interval) /* 0x0980 */ + EXCEPTION_STUB(0x1a0, watchdog) /* 0x09f0 */ + EXCEPTION_STUB(0x1c0, data_tlb_miss) + EXCEPTION_STUB(0x1e0, instruction_tlb_miss) + +#if 0 + EXCEPTION_STUB(0x280, processor_doorbell) + EXCEPTION_STUB(0x220, processor_doorbell_crit) +#endif + .globl interrupt_end_book3e +interrupt_end_book3e: + +/* Critical Input Interrupt */ + START_EXCEPTION(critical_input); + CRIT_EXCEPTION_PROLOG(0x100, PROLOG_ADDITION_NONE) +// EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE_ALL) +// bl special_reg_save_crit +// addi r3,r1,STACK_FRAME_OVERHEAD +// bl .critical_exception +// b ret_from_crit_except + b . + +/* Machine Check Interrupt */ + START_EXCEPTION(machine_check); + CRIT_EXCEPTION_PROLOG(0x200, PROLOG_ADDITION_NONE) +// EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE_ALL) +// bl special_reg_save_mc +// addi r3,r1,STACK_FRAME_OVERHEAD +// bl .machine_check_exception +// b ret_from_mc_except + b . + +/* Data Storage Interrupt */ + START_EXCEPTION(data_storage) + NORMAL_EXCEPTION_PROLOG(0x300, PROLOG_ADDITION_2REGS) + mfspr r14,SPRN_DEAR + mfspr r15,SPRN_ESR + EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_KEEP) + b storage_fault_common + +/* Instruction Storage Interrupt */ + START_EXCEPTION(instruction_storage); + NORMAL_EXCEPTION_PROLOG(0x400, PROLOG_ADDITION_2REGS) + li r15,0 + mr r14,r10 + EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_KEEP) + b storage_fault_common + +/* External Input Interrupt */ + MASKABLE_EXCEPTION(0x500, external_input, .do_IRQ, ACK_NONE) + +/* Alignment */ + START_EXCEPTION(alignment); + NORMAL_EXCEPTION_PROLOG(0x600, PROLOG_ADDITION_2REGS) + mfspr r14,SPRN_DEAR + mfspr r15,SPRN_ESR + EXCEPTION_COMMON(0x600, PACA_EXGEN, INTS_KEEP) + b alignment_more /* no room, go out of line */ + +/* Program Interrupt */ + START_EXCEPTION(program); + NORMAL_EXCEPTION_PROLOG(0x700, PROLOG_ADDITION_1REG) + mfspr r14,SPRN_ESR + EXCEPTION_COMMON(0x700, PACA_EXGEN, INTS_DISABLE_SOFT) + std r14,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + ld r14,PACA_EXGEN+EX_R14(r13) + bl .save_nvgprs + INTS_RESTORE_HARD + bl .program_check_exception + b .ret_from_except + +/* Floating Point Unavailable Interrupt */ + START_EXCEPTION(fp_unavailable); + NORMAL_EXCEPTION_PROLOG(0x800, PROLOG_ADDITION_NONE) + /* we can probably do a shorter exception entry for that one... */ + EXCEPTION_COMMON(0x800, PACA_EXGEN, INTS_KEEP) + bne 1f /* if from user, just load it up */ + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + INTS_RESTORE_HARD + bl .kernel_fp_unavailable_exception + BUG_OPCODE +1: ld r12,_MSR(r1) + bl .load_up_fpu + b fast_exception_return + +/* Decrementer Interrupt */ + MASKABLE_EXCEPTION(0x900, decrementer, .timer_interrupt, ACK_DEC) + +/* Fixed Interval Timer Interrupt */ + MASKABLE_EXCEPTION(0x980, fixed_interval, .unknown_exception, ACK_FIT) + +/* Watchdog Timer Interrupt */ + START_EXCEPTION(watchdog); + CRIT_EXCEPTION_PROLOG(0x9f0, PROLOG_ADDITION_NONE) +// EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE_ALL) +// bl special_reg_save_crit +// addi r3,r1,STACK_FRAME_OVERHEAD +// bl .unknown_exception +// b ret_from_crit_except + b . + +/* System Call Interrupt */ + START_EXCEPTION(system_call) + mr r9,r13 /* keep a copy of userland r13 */ + mfspr r11,SPRN_SRR0 /* get return address */ + mfspr r12,SPRN_SRR1 /* get previous MSR */ + mfspr r13,SPRN_SPRG_PACA /* get our PACA */ + b system_call_common + +/* Auxillary Processor Unavailable Interrupt */ + START_EXCEPTION(ap_unavailable); + NORMAL_EXCEPTION_PROLOG(0xf20, PROLOG_ADDITION_NONE) + EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_KEEP) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .save_nvgprs + INTS_RESTORE_HARD + bl .unknown_exception + b .ret_from_except + +/* Debug exception as a critical interrupt*/ + START_EXCEPTION(debug_crit); + CRIT_EXCEPTION_PROLOG(0xd00, PROLOG_ADDITION_2REGS) + + /* + * If there is a single step or branch-taken exception in an + * exception entry sequence, it was probably meant to apply to + * the code where the exception occurred (since exception entry + * doesn't turn off DE automatically). We simulate the effect + * of turning off DE on entry to an exception handler by turning + * off DE in the CSRR1 value and clearing the debug status. + */ + + mfspr r14,SPRN_DBSR /* check single-step/branch taken */ + andis. r15,r14,DBSR_IC@h + beq+ 1f + + LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e) + LOAD_REG_IMMEDIATE(r15,interrupt_end_book3e) + cmpld cr0,r10,r14 + cmpld cr1,r10,r15 + blt+ cr0,1f + bge+ cr1,1f + + /* here it looks like we got an inappropriate debug exception. */ + lis r14,DBSR_IC@h /* clear the IC event */ + rlwinm r11,r11,0,~MSR_DE /* clear DE in the CSRR1 value */ + mtspr SPRN_DBSR,r14 + mtspr SPRN_CSRR1,r11 + lwz r10,PACA_EXCRIT+EX_CR(r13) /* restore registers */ + ld r1,PACA_EXCRIT+EX_R1(r13) + ld r14,PACA_EXCRIT+EX_R14(r13) + ld r15,PACA_EXCRIT+EX_R15(r13) + mtcr r10 + ld r10,PACA_EXCRIT+EX_R10(r13) /* restore registers */ + ld r11,PACA_EXCRIT+EX_R11(r13) + mfspr r13,SPRN_SPRG_CRIT_SCRATCH + rfci + + /* Normal debug exception */ + /* XXX We only handle coming from userspace for now since we can't + * quite save properly an interrupted kernel state yet + */ +1: andi. r14,r11,MSR_PR; /* check for userspace again */ + beq kernel_dbg_exc; /* if from kernel mode */ + + /* Now we mash up things to make it look like we are coming on a + * normal exception + */ + mfspr r15,SPRN_SPRG_CRIT_SCRATCH + mtspr SPRN_SPRG_GEN_SCRATCH,r15 + mfspr r14,SPRN_DBSR + EXCEPTION_COMMON(0xd00, PACA_EXCRIT, INTS_DISABLE_ALL) + std r14,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + mr r4,r14 + ld r14,PACA_EXCRIT+EX_R14(r13) + ld r15,PACA_EXCRIT+EX_R15(r13) + bl .save_nvgprs + bl .DebugException + b .ret_from_except + +kernel_dbg_exc: + b . /* NYI */ + + +/* + * An interrupt came in while soft-disabled; clear EE in SRR1, + * clear paca->hard_enabled and return. + */ +masked_interrupt_book3e: + mtcr r10 + stb r11,PACAHARDIRQEN(r13) + mfspr r10,SPRN_SRR1 + rldicl r11,r10,48,1 /* clear MSR_EE */ + rotldi r10,r11,16 + mtspr SPRN_SRR1,r10 + ld r10,PACA_EXGEN+EX_R10(r13); /* restore registers */ + ld r11,PACA_EXGEN+EX_R11(r13); + mfspr r13,SPRN_SPRG_GEN_SCRATCH; + rfi + b . + +/* + * This is called from 0x300 and 0x400 handlers after the prologs with + * r14 and r15 containing the fault address and error code, with the + * original values stashed away in the PACA + */ +storage_fault_common: + std r14,_DAR(r1) + std r15,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + mr r4,r14 + mr r5,r15 + ld r14,PACA_EXGEN+EX_R14(r13) + ld r15,PACA_EXGEN+EX_R15(r13) + INTS_RESTORE_HARD + bl .do_page_fault + cmpdi r3,0 + bne- 1f + b .ret_from_except_lite +1: bl .save_nvgprs + mr r5,r3 + addi r3,r1,STACK_FRAME_OVERHEAD + ld r4,_DAR(r1) + bl .bad_page_fault + b .ret_from_except + +/* + * Alignment exception doesn't fit entirely in the 0x100 bytes so it + * continues here. + */ +alignment_more: + std r14,_DAR(r1) + std r15,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + ld r14,PACA_EXGEN+EX_R14(r13) + ld r15,PACA_EXGEN+EX_R15(r13) + bl .save_nvgprs + INTS_RESTORE_HARD + bl .alignment_exception + b .ret_from_except + +/* + * We branch here from entry_64.S for the last stage of the exception + * return code path. MSR:EE is expected to be off at that point + */ +_GLOBAL(exception_return_book3e) + b 1f + +/* This is the return from load_up_fpu fast path which could do with + * less GPR restores in fact, but for now we have a single return path + */ + .globl fast_exception_return +fast_exception_return: + wrteei 0 +1: mr r0,r13 + ld r10,_MSR(r1) + REST_4GPRS(2, r1) + andi. r6,r10,MSR_PR + REST_2GPRS(6, r1) + beq 1f + ACCOUNT_CPU_USER_EXIT(r10, r11) + ld r0,GPR13(r1) + +1: stdcx. r0,0,r1 /* to clear the reservation */ + + ld r8,_CCR(r1) + ld r9,_LINK(r1) + ld r10,_CTR(r1) + ld r11,_XER(r1) + mtcr r8 + mtlr r9 + mtctr r10 + mtxer r11 + REST_2GPRS(8, r1) + ld r10,GPR10(r1) + ld r11,GPR11(r1) + ld r12,GPR12(r1) + mtspr SPRN_SPRG_GEN_SCRATCH,r0 + + std r10,PACA_EXGEN+EX_R10(r13); + std r11,PACA_EXGEN+EX_R11(r13); + ld r10,_NIP(r1) + ld r11,_MSR(r1) + ld r0,GPR0(r1) + ld r1,GPR1(r1) + mtspr SPRN_SRR0,r10 + mtspr SPRN_SRR1,r11 + ld r10,PACA_EXGEN+EX_R10(r13) + ld r11,PACA_EXGEN+EX_R11(r13) + mfspr r13,SPRN_SPRG_GEN_SCRATCH + rfi + +/* + * Trampolines used when spotting a bad kernel stack pointer in + * the exception entry code. + * + * TODO: move some bits like SRR0 read to trampoline, pass PACA + * index around, etc... to handle crit & mcheck + */ +BAD_STACK_TRAMPOLINE(0x000) +BAD_STACK_TRAMPOLINE(0x100) +BAD_STACK_TRAMPOLINE(0x200) +BAD_STACK_TRAMPOLINE(0x300) +BAD_STACK_TRAMPOLINE(0x400) +BAD_STACK_TRAMPOLINE(0x500) +BAD_STACK_TRAMPOLINE(0x600) +BAD_STACK_TRAMPOLINE(0x700) +BAD_STACK_TRAMPOLINE(0x800) +BAD_STACK_TRAMPOLINE(0x900) +BAD_STACK_TRAMPOLINE(0x980) +BAD_STACK_TRAMPOLINE(0x9f0) +BAD_STACK_TRAMPOLINE(0xa00) +BAD_STACK_TRAMPOLINE(0xb00) +BAD_STACK_TRAMPOLINE(0xc00) +BAD_STACK_TRAMPOLINE(0xd00) +BAD_STACK_TRAMPOLINE(0xe00) +BAD_STACK_TRAMPOLINE(0xf00) +BAD_STACK_TRAMPOLINE(0xf20) + + .globl bad_stack_book3e +bad_stack_book3e: + /* XXX: Needs to make SPRN_SPRG_GEN depend on exception type */ + mfspr r10,SPRN_SRR0; /* read SRR0 before touching stack */ + ld r1,PACAEMERGSP(r13) + subi r1,r1,64+INT_FRAME_SIZE + std r10,_NIP(r1) + std r11,_MSR(r1) + ld r10,PACA_EXGEN+EX_R1(r13) /* FIXME for crit & mcheck */ + lwz r11,PACA_EXGEN+EX_CR(r13) /* FIXME for crit & mcheck */ + std r10,GPR1(r1) + std r11,_CCR(r1) + mfspr r10,SPRN_DEAR + mfspr r11,SPRN_ESR + std r10,_DAR(r1) + std r11,_DSISR(r1) + std r0,GPR0(r1); /* save r0 in stackframe */ \ + std r2,GPR2(r1); /* save r2 in stackframe */ \ + SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ + SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ + std r9,GPR9(r1); /* save r9 in stackframe */ \ + ld r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */ \ + ld r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */ \ + mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 XXX can be wrong */ \ + std r3,GPR10(r1); /* save r10 to stackframe */ \ + std r4,GPR11(r1); /* save r11 to stackframe */ \ + std r12,GPR12(r1); /* save r12 in stackframe */ \ + std r5,GPR13(r1); /* save it to stackframe */ \ + mflr r10 + mfctr r11 + mfxer r12 + std r10,_LINK(r1) + std r11,_CTR(r1) + std r12,_XER(r1) + SAVE_10GPRS(14,r1) + SAVE_8GPRS(24,r1) + lhz r12,PACA_TRAP_SAVE(r13) + std r12,_TRAP(r1) + addi r11,r1,INT_FRAME_SIZE + std r11,0(r1) + li r12,0 + std r12,0(r11) + ld r2,PACATOC(r13) +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .kernel_bad_stack + b 1b + +/* + * Setup the initial TLB for a core. This current implementation + * assume that whatever we are running off will not conflict with + * the new mapping at PAGE_OFFSET. + * We also make various assumptions about the processor we run on, + * this might have to be made more flexible based on the content + * of MMUCFG and friends. + */ +_GLOBAL(initial_tlb_book3e) + + /* Setup MAS 0,1,2,3 and 7 for tlbwe of a 1G entry that maps the + * kernel linear mapping. We also set MAS8 once for all here though + * that will have to be made dependent on whether we are running under + * a hypervisor I suppose. + */ + li r3,MAS0_HES | MAS0_WQ_ALLWAYS + mtspr SPRN_MAS0,r3 + lis r3,(MAS1_VALID | MAS1_IPROT)@h + ori r3,r3,BOOK3E_PAGESZ_1GB << MAS1_TSIZE_SHIFT + mtspr SPRN_MAS1,r3 + LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET | MAS2_M) + mtspr SPRN_MAS2,r3 + li r3,MAS3_SR | MAS3_SW | MAS3_SX + mtspr SPRN_MAS7_MAS3,r3 + li r3,0 + mtspr SPRN_MAS8,r3 + + /* Write the TLB entry */ + tlbwe + + /* Now we branch the new virtual address mapped by this entry */ + LOAD_REG_IMMEDIATE(r3,1f) + mtctr r3 + bctr + +1: /* We are now running at PAGE_OFFSET, clean the TLB of everything + * else (XXX we should scan for bolted crap from the firmware too) + */ + PPC_TLBILX(0,0,0) + sync + isync + + /* We translate LR and return */ + mflr r3 + tovirt(r3,r3) + mtlr r3 + blr + +/* + * Main entry (boot CPU, thread 0) + * + * We enter here from head_64.S, possibly after the prom_init trampoline + * with r3 and r4 already saved to r31 and 30 respectively and in 64 bits + * mode. Anything else is as it was left by the bootloader + * + * Initial requirements of this port: + * + * - Kernel loaded at 0 physical + * - A good lump of memory mapped 0:0 by UTLB entry 0 + * - MSR:IS & MSR:DS set to 0 + * + * Note that some of the above requirements will be relaxed in the future + * as the kernel becomes smarter at dealing with different initial conditions + * but for now you have to be careful + */ +_GLOBAL(start_initialization_book3e) + mflr r28 + + /* First, we need to setup some initial TLBs to map the kernel + * text, data and bss at PAGE_OFFSET. We don't have a real mode + * and always use AS 0, so we just set it up to match our link + * address and never use 0 based addresses. + */ + bl .initial_tlb_book3e + + /* Init global core bits */ + bl .init_core_book3e + + /* Init per-thread bits */ + bl .init_thread_book3e + + /* Return to common init code */ + tovirt(r28,r28) + mtlr r28 + blr + + +/* + * Secondary core/processor entry + * + * This is entered for thread 0 of a secondary core, all other threads + * are expected to be stopped. It's similar to start_initialization_book3e + * except that it's generally entered from the holding loop in head_64.S + * after CPUs have been gathered by Open Firmware. + * + * We assume we are in 32 bits mode running with whatever TLB entry was + * set for us by the firmware or POR engine. + */ +_GLOBAL(book3e_secondary_core_init_tlb_set) + li r4,1 + b .generic_secondary_smp_init + +_GLOBAL(book3e_secondary_core_init) + mflr r28 + + /* Do we need to setup initial TLB entry ? */ + cmplwi r4,0 + bne 2f + + /* Setup TLB for this core */ + bl .initial_tlb_book3e + + /* We can return from the above running at a different + * address, so recalculate r2 (TOC) + */ + bl .relative_toc + + /* Init global core bits */ +2: bl .init_core_book3e + + /* Init per-thread bits */ +3: bl .init_thread_book3e + + /* Return to common init code at proper virtual address. + * + * Due to various previous assumptions, we know we entered this + * function at either the final PAGE_OFFSET mapping or using a + * 1:1 mapping at 0, so we don't bother doing a complicated check + * here, we just ensure the return address has the right top bits. + * + * Note that if we ever want to be smarter about where we can be + * started from, we have to be careful that by the time we reach + * the code below we may already be running at a different location + * than the one we were called from since initial_tlb_book3e can + * have moved us already. + */ + cmpdi cr0,r28,0 + blt 1f + lis r3,PAGE_OFFSET@highest + sldi r3,r3,32 + or r28,r28,r3 +1: mtlr r28 + blr + +_GLOBAL(book3e_secondary_thread_init) + mflr r28 + b 3b + +_STATIC(init_core_book3e) + /* Establish the interrupt vector base */ + LOAD_REG_IMMEDIATE(r3, interrupt_base_book3e) + mtspr SPRN_IVPR,r3 + sync + blr + +_STATIC(init_thread_book3e) + lis r3,(SPRN_EPCR_ICM | SPRN_EPCR_GICM)@h + mtspr SPRN_EPCR,r3 + + /* Make sure interrupts are off */ + wrteei 0 + + /* disable watchdog and FIT and enable DEC interrupts */ + lis r3,TCR_DIE@h + mtspr SPRN_TCR,r3 + + blr + + + diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 0552f01041ab..c38afdb45d7b 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -121,10 +121,11 @@ __run_at_load: */ .globl __secondary_hold __secondary_hold: +#ifndef CONFIG_PPC_BOOK3E mfmsr r24 ori r24,r24,MSR_RI mtmsrd r24 /* RI on */ - +#endif /* Grab our physical cpu number */ mr r24,r3 @@ -143,6 +144,7 @@ __secondary_hold: ld r4,0(r4) /* deref function descriptor */ mtctr r4 mr r3,r24 + li r4,0 bctr #else BUG_OPCODE @@ -163,21 +165,49 @@ exception_marker: #include "exceptions-64s.S" #endif +_GLOBAL(generic_secondary_thread_init) + mr r24,r3 + + /* turn on 64-bit mode */ + bl .enable_64b_mode + + /* get a valid TOC pointer, wherever we're mapped at */ + bl .relative_toc + +#ifdef CONFIG_PPC_BOOK3E + /* Book3E initialization */ + mr r3,r24 + bl .book3e_secondary_thread_init +#endif + b generic_secondary_common_init /* * On pSeries and most other platforms, secondary processors spin * in the following code. * At entry, r3 = this processor's number (physical cpu id) + * + * On Book3E, r4 = 1 to indicate that the initial TLB entry for + * this core already exists (setup via some other mechanism such + * as SCOM before entry). */ _GLOBAL(generic_secondary_smp_init) mr r24,r3 - + mr r25,r4 + /* turn on 64-bit mode */ bl .enable_64b_mode - /* get the TOC pointer (real address) */ + /* get a valid TOC pointer, wherever we're mapped at */ bl .relative_toc +#ifdef CONFIG_PPC_BOOK3E + /* Book3E initialization */ + mr r3,r24 + mr r4,r25 + bl .book3e_secondary_core_init +#endif + +generic_secondary_common_init: /* Set up a paca value for this processor. Since we have the * physical cpu id in r24, we need to search the pacas to find * which logical id maps to our physical one. @@ -196,6 +226,11 @@ _GLOBAL(generic_secondary_smp_init) b .kexec_wait /* next kernel might do better */ 2: mtspr SPRN_SPRG_PACA,r13 /* Save vaddr of paca in an SPRG */ +#ifdef CONFIG_PPC_BOOK3E + addi r12,r13,PACA_EXTLB /* and TLB exc frame in another */ + mtspr SPRN_SPRG_TLB_EXFRAME,r12 +#endif + /* From now on, r24 is expected to be logical cpuid */ mr r24,r5 3: HMT_LOW @@ -231,6 +266,7 @@ _GLOBAL(generic_secondary_smp_init) * Turn the MMU off. * Assumes we're mapped EA == RA if the MMU is on. */ +#ifdef CONFIG_PPC_BOOK3S _STATIC(__mmu_off) mfmsr r3 andi. r0,r3,MSR_IR|MSR_DR @@ -242,6 +278,7 @@ _STATIC(__mmu_off) sync rfid b . /* prevent speculative execution */ +#endif /* @@ -279,6 +316,10 @@ _GLOBAL(__start_initialization_multiplatform) mr r31,r3 mr r30,r4 +#ifdef CONFIG_PPC_BOOK3E + bl .start_initialization_book3e + b .__after_prom_start +#else /* Setup some critical 970 SPRs before switching MMU off */ mfspr r0,SPRN_PVR srwi r0,r0,16 @@ -296,6 +337,7 @@ _GLOBAL(__start_initialization_multiplatform) /* Switch off MMU if not already off */ bl .__mmu_off b .__after_prom_start +#endif /* CONFIG_PPC_BOOK3E */ _INIT_STATIC(__boot_from_prom) #ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE @@ -358,10 +400,16 @@ _STATIC(__after_prom_start) * Note: This process overwrites the OF exception vectors. */ li r3,0 /* target addr */ +#ifdef CONFIG_PPC_BOOK3E + tovirt(r3,r3) /* on booke, we already run at PAGE_OFFSET */ +#endif mr. r4,r26 /* In some cases the loader may */ beq 9f /* have already put us at zero */ li r6,0x100 /* Start offset, the first 0x100 */ /* bytes were copied earlier. */ +#ifdef CONFIG_PPC_BOOK3E + tovirt(r6,r6) /* on booke, we already run at PAGE_OFFSET */ +#endif #ifdef CONFIG_CRASH_DUMP /* @@ -507,6 +555,9 @@ _GLOBAL(pmac_secondary_start) * r13 = paca virtual address * SPRG_PACA = paca virtual address */ + .section ".text"; + .align 2 ; + .globl __secondary_start __secondary_start: /* Set thread priority to MEDIUM */ @@ -543,7 +594,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES) mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - rfid + RFI b . /* prevent speculative execution */ /* @@ -564,11 +615,16 @@ _GLOBAL(start_secondary_prolog) */ _GLOBAL(enable_64b_mode) mfmsr r11 /* grab the current MSR */ +#ifdef CONFIG_PPC_BOOK3E + oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */ + mtmsr r11 +#else /* CONFIG_PPC_BOOK3E */ li r12,(MSR_SF | MSR_ISF)@highest sldi r12,r12,48 or r11,r11,r12 mtmsrd r11 isync +#endif blr /* @@ -612,9 +668,11 @@ _INIT_STATIC(start_here_multiplatform) bdnz 3b 4: +#ifndef CONFIG_PPC_BOOK3E mfmsr r6 ori r6,r6,MSR_RI mtmsrd r6 /* RI on */ +#endif #ifdef CONFIG_RELOCATABLE /* Save the physical address we're running at in kernstart_addr */ @@ -647,7 +705,7 @@ _INIT_STATIC(start_here_multiplatform) ld r4,PACAKMSR(r13) mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - rfid + RFI b . /* prevent speculative execution */ /* This is where all platforms converge execution */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 65aced7b833a..87df51720641 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -454,6 +454,24 @@ static void __init irqstack_early_init(void) #define irqstack_early_init() #endif +#ifdef CONFIG_PPC_BOOK3E +static void __init exc_lvl_early_init(void) +{ + unsigned int i; + + for_each_possible_cpu(i) { + critirq_ctx[i] = (struct thread_info *) + __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + dbgirq_ctx[i] = (struct thread_info *) + __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + mcheckirq_ctx[i] = (struct thread_info *) + __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + } +} +#else +#define exc_lvl_early_init() +#endif + /* * Stack space used when we detect a bad kernel stack pointer, and * early in SMP boots before relocation is enabled. @@ -513,6 +531,7 @@ void __init setup_arch(char **cmdline_p) init_mm.brk = klimit; irqstack_early_init(); + exc_lvl_early_init(); emergency_stack_init(); #ifdef CONFIG_PPC_STD_MMU_64 -- cgit From 20d70345f181be6bdd5b0a76a408d0693683bf3d Mon Sep 17 00:00:00 2001 From: Stefan Roese Date: Wed, 29 Jul 2009 07:04:46 +0000 Subject: powerpc: Add AMCC 460EX/460GT Rev. B support to cputable.c Signed-off-by: Stefan Roese Signed-off-by: Josh Boyer Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/cputable.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index f34ea37079b5..9f38ecb17859 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1653,7 +1653,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .platform = "ppc440", }, { /* 460EX */ - .pvr_mask = 0xffff0002, + .pvr_mask = 0xffff0006, .pvr_value = 0x13020002, .cpu_name = "460EX", .cpu_features = CPU_FTRS_440x6, @@ -1665,8 +1665,21 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_440A, .platform = "ppc440", }, + { /* 460EX Rev B */ + .pvr_mask = 0xffff0007, + .pvr_value = 0x13020004, + .cpu_name = "460EX Rev. B", + .cpu_features = CPU_FTRS_440x6, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_460ex, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, { /* 460GT */ - .pvr_mask = 0xffff0002, + .pvr_mask = 0xffff0006, .pvr_value = 0x13020000, .cpu_name = "460GT", .cpu_features = CPU_FTRS_440x6, @@ -1678,6 +1691,19 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_440A, .platform = "ppc440", }, + { /* 460GT Rev B */ + .pvr_mask = 0xffff0007, + .pvr_value = 0x13020005, + .cpu_name = "460GT Rev. B", + .cpu_features = CPU_FTRS_440x6, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_460gt, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, { /* 460SX */ .pvr_mask = 0xffffff00, .pvr_value = 0x13541800, -- cgit From cf68787b68a2011664f1670a827c8f202ddb7c3d Mon Sep 17 00:00:00 2001 From: Benjamin Krill Date: Mon, 27 Jul 2009 22:02:39 +0000 Subject: powerpc/prom_init: Evaluate mem kernel parameter for early allocation Evaluate mem kernel parameter for early memory allocations. If mem is set no allocation in the region above the given boundary is allowed. The current code doesn't take care about this and allocate memory above the given mem boundary. Signed-off-by: Benjamin Krill Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom_init.c | 103 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 101 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index d942404779c1..864334b337a3 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -190,6 +190,8 @@ static int __initdata of_platform; static char __initdata prom_cmd_line[COMMAND_LINE_SIZE]; +static unsigned long __initdata prom_memory_limit; + static unsigned long __initdata alloc_top; static unsigned long __initdata alloc_top_high; static unsigned long __initdata alloc_bottom; @@ -484,6 +486,67 @@ static int __init prom_setprop(phandle node, const char *nodename, return call_prom("interpret", 1, 1, (u32)(unsigned long) cmd); } +/* We can't use the standard versions because of RELOC headaches. */ +#define isxdigit(c) (('0' <= (c) && (c) <= '9') \ + || ('a' <= (c) && (c) <= 'f') \ + || ('A' <= (c) && (c) <= 'F')) + +#define isdigit(c) ('0' <= (c) && (c) <= '9') +#define islower(c) ('a' <= (c) && (c) <= 'z') +#define toupper(c) (islower(c) ? ((c) - 'a' + 'A') : (c)) + +unsigned long prom_strtoul(const char *cp, const char **endp) +{ + unsigned long result = 0, base = 10, value; + + if (*cp == '0') { + base = 8; + cp++; + if (toupper(*cp) == 'X') { + cp++; + base = 16; + } + } + + while (isxdigit(*cp) && + (value = isdigit(*cp) ? *cp - '0' : toupper(*cp) - 'A' + 10) < base) { + result = result * base + value; + cp++; + } + + if (endp) + *endp = cp; + + return result; +} + +unsigned long prom_memparse(const char *ptr, const char **retptr) +{ + unsigned long ret = prom_strtoul(ptr, retptr); + int shift = 0; + + /* + * We can't use a switch here because GCC *may* generate a + * jump table which won't work, because we're not running at + * the address we're linked at. + */ + if ('G' == **retptr || 'g' == **retptr) + shift = 30; + + if ('M' == **retptr || 'm' == **retptr) + shift = 20; + + if ('K' == **retptr || 'k' == **retptr) + shift = 10; + + if (shift) { + ret <<= shift; + (*retptr)++; + } + + return ret; +} + /* * Early parsing of the command line passed to the kernel, used for * "mem=x" and the options that affect the iommu @@ -491,9 +554,8 @@ static int __init prom_setprop(phandle node, const char *nodename, static void __init early_cmdline_parse(void) { struct prom_t *_prom = &RELOC(prom); -#ifdef CONFIG_PPC64 const char *opt; -#endif + char *p; int l = 0; @@ -521,6 +583,15 @@ static void __init early_cmdline_parse(void) RELOC(prom_iommu_force_on) = 1; } #endif + opt = strstr(RELOC(prom_cmd_line), RELOC("mem=")); + if (opt) { + opt += 4; + RELOC(prom_memory_limit) = prom_memparse(opt, (const char **)&opt); +#ifdef CONFIG_PPC64 + /* Align to 16 MB == size of ppc64 large page */ + RELOC(prom_memory_limit) = ALIGN(RELOC(prom_memory_limit), 0x1000000); +#endif + } } #ifdef CONFIG_PPC_PSERIES @@ -1026,6 +1097,29 @@ static void __init prom_init_mem(void) RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(prom_initrd_end)); } + /* + * If prom_memory_limit is set we reduce the upper limits *except* for + * alloc_top_high. This must be the real top of RAM so we can put + * TCE's up there. + */ + + RELOC(alloc_top_high) = RELOC(ram_top); + + if (RELOC(prom_memory_limit)) { + if (RELOC(prom_memory_limit) <= RELOC(alloc_bottom)) { + prom_printf("Ignoring mem=%x <= alloc_bottom.\n", + RELOC(prom_memory_limit)); + RELOC(prom_memory_limit) = 0; + } else if (RELOC(prom_memory_limit) >= RELOC(ram_top)) { + prom_printf("Ignoring mem=%x >= ram_top.\n", + RELOC(prom_memory_limit)); + RELOC(prom_memory_limit) = 0; + } else { + RELOC(ram_top) = RELOC(prom_memory_limit); + RELOC(rmo_top) = min(RELOC(rmo_top), RELOC(prom_memory_limit)); + } + } + /* * Setup our top alloc point, that is top of RMO or top of * segment 0 when running non-LPAR. @@ -1041,6 +1135,7 @@ static void __init prom_init_mem(void) RELOC(alloc_top_high) = RELOC(ram_top); prom_printf("memory layout at init:\n"); + prom_printf(" memory_limit : %x (16 MB aligned)\n", RELOC(prom_memory_limit)); prom_printf(" alloc_bottom : %x\n", RELOC(alloc_bottom)); prom_printf(" alloc_top : %x\n", RELOC(alloc_top)); prom_printf(" alloc_top_hi : %x\n", RELOC(alloc_top_high)); @@ -2395,6 +2490,10 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, /* * Fill in some infos for use by the kernel later on */ + if (RELOC(prom_memory_limit)) + prom_setprop(_prom->chosen, "/chosen", "linux,memory-limit", + &RELOC(prom_memory_limit), + sizeof(prom_memory_limit)); #ifdef CONFIG_PPC64 if (RELOC(prom_iommu_off)) prom_setprop(_prom->chosen, "/chosen", "linux,iommu-off", -- cgit From 14ea58ad797e4e9b7be755aca0fd3925d0713ede Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 1 Aug 2009 22:48:27 +0000 Subject: powerpc: Use DIV_ROUND_CLOSEST in time init code The kernel.h macro DIV_ROUND_CLOSEST performs the computation (x + d/2)/d but is perhaps more readable. The semantic patch that makes this change is as follows: (http://www.emn.fr/x-info/coccinelle/) // @haskernel@ @@ #include @depends on haskernel@ expression x,__divisor; @@ - (((x) + ((__divisor) / 2)) / (__divisor)) + DIV_ROUND_CLOSEST(x,__divisor) // Signed-off-by: Julia Lawall Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/time.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index eae4511ceeac..edb1edb36469 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -479,7 +479,8 @@ static int __init iSeries_tb_recal(void) unsigned long tb_ticks = tb - iSeries_recal_tb; unsigned long titan_usec = (titan - iSeries_recal_titan) >> 12; unsigned long new_tb_ticks_per_sec = (tb_ticks * USEC_PER_SEC)/titan_usec; - unsigned long new_tb_ticks_per_jiffy = (new_tb_ticks_per_sec+(HZ/2))/HZ; + unsigned long new_tb_ticks_per_jiffy = + DIV_ROUND_CLOSEST(new_tb_ticks_per_sec, HZ); long tick_diff = new_tb_ticks_per_jiffy - tb_ticks_per_jiffy; char sign = '+'; /* make sure tb_ticks_per_sec and tb_ticks_per_jiffy are consistent */ -- cgit From a15098c90df1ac2b1bfe1d33dd1c47063213aa9a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 9 Aug 2009 19:02:51 +0000 Subject: powerpc: Enable GCOV Make it possible to enable GCOV code coverage measurement on powerpc. Lightly tested on 64-bit, seems to work as expected. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/Makefile | 7 +++++++ arch/powerpc/kernel/vdso32/Makefile | 1 + arch/powerpc/kernel/vdso64/Makefile | 2 ++ 3 files changed, 10 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 035946f9d5fb..720e8c3b8e94 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -115,6 +115,13 @@ ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),) obj-y += ppc_save_regs.o endif +# Disable GCOV in odd or sensitive code +GCOV_PROFILE_prom_init.o := n +GCOV_PROFILE_ftrace.o := n +GCOV_PROFILE_machine_kexec_64.o := n +GCOV_PROFILE_machine_kexec_32.o := n +GCOV_PROFILE_kprobes.o := n + extra-$(CONFIG_PPC_FPU) += fpu.o extra-$(CONFIG_ALTIVEC) += vector.o extra-$(CONFIG_PPC64) += entry_64.o diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index c3d57bd01a88..b54b81688132 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -12,6 +12,7 @@ endif targets := $(obj-vdso32) vdso32.so vdso32.so.dbg obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) +GCOV_PROFILE := n EXTRA_CFLAGS := -shared -fno-common -fno-builtin EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index fa7f1b8f3e50..dd0c8e936775 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -7,6 +7,8 @@ obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o targets := $(obj-vdso64) vdso64.so vdso64.so.dbg obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) +GCOV_PROFILE := n + EXTRA_CFLAGS := -shared -fno-common -fno-builtin EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ $(call ld-option, -Wl$(comma)--hash-style=sysv) -- cgit From 903444e4297264ec0959e886695911659edb425c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 9 Aug 2009 19:06:24 +0000 Subject: powerpc/vmlinux.lds: Move _edata down Currently _edata does not include several data sections, this causes the kernel's report of memory usage at boot to not match reality, and also prevents kmemleak from working - because it scan between _sdata and _edata for pointers to allocated memory. This mirrors a similar change made recently to the x86 linker script. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vmlinux.lds.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 8ef8a14abc95..3bb09975e342 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -245,10 +245,6 @@ SECTIONS } #endif - . = ALIGN(PAGE_SIZE); - _edata = .; - PROVIDE32 (edata = .); - /* The initial task and kernel stack */ #ifdef CONFIG_PPC32 . = ALIGN(8192); @@ -282,6 +278,10 @@ SECTIONS __nosave_end = .; } + . = ALIGN(PAGE_SIZE); + _edata = .; + PROVIDE32 (edata = .); + /* * And finally the bss */ -- cgit From d90246cd8e0141332a8ab09c3c1800cc2028a686 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sat, 22 Aug 2009 22:23:13 +0200 Subject: timekeeping: Increase granularity of read_persistent_clock(), build fix Fix the following build problem on powerpc: arch/powerpc/kernel/time.c: In function 'read_persistent_clock': arch/powerpc/kernel/time.c:788: error: 'return' with a value, in function returning void arch/powerpc/kernel/time.c:791: error: 'return' with a value, in function returning void Reported-by: Ingo Molnar Signed-off-by: Martin Schwidefsky Cc: dwalker@fifo99.com Cc: johnstul@us.ibm.com LKML-Reference: <20090822222313.74b9619c@skybase> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/time.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index ad63f30fe3da..a508388fb87c 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -774,6 +774,7 @@ void read_persistent_clock(struct timespec *ts) struct rtc_time tm; static int first = 1; + ts->tv_nsec = 0; /* XXX this is a litle fragile but will work okay in the short term */ if (first) { first = 0; @@ -781,15 +782,18 @@ void read_persistent_clock(struct timespec *ts) timezone_offset = ppc_md.time_init(); /* get_boot_time() isn't guaranteed to be safe to call late */ - if (ppc_md.get_boot_time) - return ppc_md.get_boot_time() -timezone_offset; + if (ppc_md.get_boot_time) { + ts->tv_sec = ppc_md.get_boot_time() - timezone_offset; + return; + } + } + if (!ppc_md.get_rtc_time) { + ts->tv_sec = 0; + return; } - if (!ppc_md.get_rtc_time) - return 0; ppc_md.get_rtc_time(&tm); ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); - ts->tv_nsec = 0; } /* clocksource code */ -- cgit From ea3cc330ac0cd521ff07c7cd432a1848c19a7e92 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 18 Aug 2009 19:00:34 +0000 Subject: powerpc/mm: Cleanup handling of execute permission This is an attempt at cleaning up a bit the way we handle execute permission on powerpc. _PAGE_HWEXEC is gone, _PAGE_EXEC is now only defined by CPUs that can do something with it, and the myriad of #ifdef's in the I$/D$ coherency code is reduced to 2 cases that hopefully should cover everything. The logic on BookE is a little bit different than what it was though not by much. Since now, _PAGE_EXEC will be set by the generic code for executable pages, we need to filter out if they are unclean and recover it. However, I don't expect the code to be more bloated than it already was in that area due to that change. I could boast that this brings proper enforcing of per-page execute permissions to all BookE and 40x but in fact, we've had that now for some time as a side effect of my previous rework in that area (and I didn't even know it :-) We would only enable execute permission if the page was cache clean and we would only cache clean it if we took and exec fault. Since we now enforce that the later only work if VM_EXEC is part of the VMA flags, we de-fact already enforce per-page execute permissions... Unless I missed something Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_44x.S | 2 +- arch/powerpc/kernel/head_fsl_booke.S | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 656cfb2d6666..711368b993f2 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -497,7 +497,7 @@ tlb_44x_patch_hwater_D: mtspr SPRN_MMUCR,r12 /* Make up the required permissions */ - li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_HWEXEC + li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC /* Compute pgdir/pmd offset */ rlwinm r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK_BIT, 29 diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index eca80482ae72..2c5af5256479 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -643,7 +643,7 @@ interrupt_base: 4: /* Make up the required permissions */ - li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_HWEXEC + li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC FIND_PTE andc. r13,r13,r11 /* Check permission */ @@ -742,7 +742,7 @@ finish_tlb_load: #endif mtspr SPRN_MAS2, r12 - li r10, (_PAGE_HWEXEC | _PAGE_PRESENT) + li r10, (_PAGE_EXEC | _PAGE_PRESENT) rlwimi r10, r11, 31, 29, 29 /* extract _PAGE_DIRTY into SW */ and r12, r11, r10 andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */ -- cgit From 14d757520a08d09745c3b18bb34addd9bef56e2d Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Wed, 19 Aug 2009 04:27:53 +0000 Subject: powerpc: Fix __flush_icache_range on 44x The ptrace POKETEXT interface allows a process to modify the text pages of a child process being ptraced, usually to insert breakpoints via trap instructions. The kernel eventually calls copy_to_user_page, which in turn calls __flush_icache_range to invalidate the icache lines for the child process. However, this function does not work on 44x due to the icache being virtually indexed. This was noticed by a breakpoint being triggered after it had been cleared by ltrace on a 440EPx board. The convenient solution is to do a flash invalidate of the icache in the __flush_icache_range function. Signed-off-by: Josh Boyer Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/misc_32.S | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 15f28e0de78d..da9c0c4c10f3 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -342,10 +342,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) addi r3,r3,L1_CACHE_BYTES bdnz 1b sync /* wait for dcbst's to get to ram */ +#ifndef CONFIG_44x mtctr r4 2: icbi 0,r6 addi r6,r6,L1_CACHE_BYTES bdnz 2b +#else + /* Flash invalidate on 44x because we are passed kmapped addresses and + this doesn't work for userspace pages due to the virtually tagged + icache. Sigh. */ + iccci 0, r0 +#endif sync /* additional sync needed on g4 */ isync blr -- cgit From 6776426320e151051a16bc7bf86f12d310c9e8ca Mon Sep 17 00:00:00 2001 From: Gautham R Shenoy Date: Tue, 23 Jun 2009 23:26:37 +0000 Subject: powerpc/pseries: Reduce the polling interval in __cpu_up() Time time taken for a single cpu online operation on a pseries machine is as follows: Dedicated LPAR (POWER6): ~220ms. Shared LPAR (POWER5) : ~240ms. Of this time, approximately 200ms is taken up by __cpu_up(). This is because we poll every 200ms to check if the new cpu has notified it's presence through the cpu_callin_map. We repeat this operation until the new cpu sets the value in cpu_callin_map or 5 seconds elapse, whichever comes earlier. However, using completion_structs instead of polling loops, the time taken by the new processor to indicate it's presence has found to be less than 1ms on pseries. This method however may not work on all powerpc platforms due to the time-base synchronization code. Keeping this in mind, we could reduce msleep polling interval from 200ms to 1ms while retaining the 5 second timeout. With this, the time taken for a cpu online operation changes as follows: Dedicated LPAR (POWER6): 20-25ms. Shared LPAR (POWER5) : 60-80ms. In both these cases, it was found that the code polls through the loop only once indicating that 1ms is a reasonable value, atleast on pseries. The code needs testing on other powerpc platforms. Signed-off-by: Gautham R Shenoy Acked-by: Joel Schopp Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 0b47de07302d..96f107cc0160 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -412,9 +412,8 @@ int __cpuinit __cpu_up(unsigned int cpu) * CPUs can take much longer to come up in the * hotplug case. Wait five seconds. */ - for (c = 25; c && !cpu_callin_map[cpu]; c--) { - msleep(200); - } + for (c = 5000; c && !cpu_callin_map[cpu]; c--) + msleep(1); #endif if (!cpu_callin_map[cpu]) { -- cgit From 762afb7317b1987fa0851135fe4f2947f68c3c2a Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 4 Aug 2009 19:08:22 +0000 Subject: powerpc: Remove addr_needs_map in struct dma_mapping_ops This patch adds max_direct_dma_addr to struct dev_archdata to remove addr_needs_map in struct dma_mapping_ops. It also converts dma_capable() to use max_direct_dma_addr. max_direct_dma_addr is initialized in pci_dma_dev_setup_swiotlb(), called via ppc_md.pci_dma_dev_setup hook. For further information: http://marc.info/?t=124719060200001&r=1&w=2 Signed-off-by: FUJITA Tomonori Acked-by: Becky Bruce Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/dma-swiotlb.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index e8a57de85bcf..c9f6a302e879 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -24,26 +24,6 @@ int swiotlb __read_mostly; unsigned int ppc_swiotlb_enable; -/* - * Determine if an address is reachable by a pci device, or if we must bounce. - */ -static int -swiotlb_pci_addr_needs_map(struct device *hwdev, dma_addr_t addr, size_t size) -{ - dma_addr_t max; - struct pci_controller *hose; - struct pci_dev *pdev = to_pci_dev(hwdev); - - hose = pci_bus_to_host(pdev->bus); - max = hose->dma_window_base_cur + hose->dma_window_size; - - /* check that we're within mapped pci window space */ - if ((addr + size > max) | (addr < hose->dma_window_base_cur)) - return 1; - - return 0; -} - /* * At the moment, all platforms that use this code only require * swiotlb to be used if we're operating on HIGHMEM. Since @@ -73,22 +53,36 @@ struct dma_mapping_ops swiotlb_pci_dma_ops = { .dma_supported = swiotlb_dma_supported, .map_page = swiotlb_map_page, .unmap_page = swiotlb_unmap_page, - .addr_needs_map = swiotlb_pci_addr_needs_map, .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, .sync_single_range_for_device = swiotlb_sync_single_range_for_device, .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, .sync_sg_for_device = swiotlb_sync_sg_for_device }; +void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev) +{ + struct pci_controller *hose; + struct dev_archdata *sd; + + hose = pci_bus_to_host(pdev->bus); + sd = &pdev->dev.archdata; + sd->max_direct_dma_addr = + hose->dma_window_base_cur + hose->dma_window_size; +} + static int ppc_swiotlb_bus_notify(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; + struct dev_archdata *sd; /* We are only intereted in device addition */ if (action != BUS_NOTIFY_ADD_DEVICE) return 0; + sd = &dev->archdata; + sd->max_direct_dma_addr = 0; + /* May need to bounce if the device can't address all of DRAM */ if (dma_get_mask(dev) < lmb_end_of_DRAM()) set_dma_ops(dev, &swiotlb_dma_ops); -- cgit From 3702977fa7d1a1a95caa387121fa7c9f4cae35f3 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 4 Aug 2009 19:08:23 +0000 Subject: powerpc: Remove swiotlb_pci_dma_ops Now swiotlb_pci_dma_ops is identical to swiotlb_dma_ops; we can use swiotlb_dma_ops with any devices. This removes swiotlb_pci_dma_ops. Signed-off-by: FUJITA Tomonori Acked-by: Becky Bruce Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/dma-swiotlb.c | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index c9f6a302e879..ca141e108ae3 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -45,20 +45,6 @@ struct dma_mapping_ops swiotlb_dma_ops = { .sync_sg_for_device = swiotlb_sync_sg_for_device }; -struct dma_mapping_ops swiotlb_pci_dma_ops = { - .alloc_coherent = dma_direct_alloc_coherent, - .free_coherent = dma_direct_free_coherent, - .map_sg = swiotlb_map_sg_attrs, - .unmap_sg = swiotlb_unmap_sg_attrs, - .dma_supported = swiotlb_dma_supported, - .map_page = swiotlb_map_page, - .unmap_page = swiotlb_unmap_page, - .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, - .sync_single_range_for_device = swiotlb_sync_single_range_for_device, - .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = swiotlb_sync_sg_for_device -}; - void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev) { struct pci_controller *hose; -- cgit From 45223c549273bbb2c6e1bc6e3629174e8765ad01 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 4 Aug 2009 19:08:25 +0000 Subject: powerpc: use dma_map_ops struct This converts uses dma_map_ops struct (in include/linux/dma-mapping.h) instead of POWERPC homegrown dma_mapping_ops. Signed-off-by: FUJITA Tomonori Acked-by: Becky Bruce Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/dma-iommu.c | 2 +- arch/powerpc/kernel/dma-swiotlb.c | 2 +- arch/powerpc/kernel/dma.c | 2 +- arch/powerpc/kernel/ibmebus.c | 2 +- arch/powerpc/kernel/pci-common.c | 6 +++--- arch/powerpc/kernel/vio.c | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 2983adac8cc3..87ddb3fb948c 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -89,7 +89,7 @@ static int dma_iommu_dma_supported(struct device *dev, u64 mask) return 1; } -struct dma_mapping_ops dma_iommu_ops = { +struct dma_map_ops dma_iommu_ops = { .alloc_coherent = dma_iommu_alloc_coherent, .free_coherent = dma_iommu_free_coherent, .map_sg = dma_iommu_map_sg, diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index ca141e108ae3..d1143a68d82a 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -31,7 +31,7 @@ unsigned int ppc_swiotlb_enable; * map_page, and unmap_page on highmem, use normal dma_ops * for everything else. */ -struct dma_mapping_ops swiotlb_dma_ops = { +struct dma_map_ops swiotlb_dma_ops = { .alloc_coherent = dma_direct_alloc_coherent, .free_coherent = dma_direct_free_coherent, .map_sg = swiotlb_map_sg_attrs, diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index ccf129d47d84..c61f70e145ad 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -140,7 +140,7 @@ static inline void dma_direct_sync_single_range(struct device *dev, } #endif -struct dma_mapping_ops dma_direct_ops = { +struct dma_map_ops dma_direct_ops = { .alloc_coherent = dma_direct_alloc_coherent, .free_coherent = dma_direct_free_coherent, .map_sg = dma_direct_map_sg, diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 6e3f62493659..a4c8b38b0ba1 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -127,7 +127,7 @@ static int ibmebus_dma_supported(struct device *dev, u64 mask) return 1; } -static struct dma_mapping_ops ibmebus_dma_ops = { +static struct dma_map_ops ibmebus_dma_ops = { .alloc_coherent = ibmebus_alloc_coherent, .free_coherent = ibmebus_free_coherent, .map_sg = ibmebus_map_sg, diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 5a56e97c5ac0..7585f1fc26db 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -50,14 +50,14 @@ resource_size_t isa_mem_base; unsigned int ppc_pci_flags = 0; -static struct dma_mapping_ops *pci_dma_ops = &dma_direct_ops; +static struct dma_map_ops *pci_dma_ops = &dma_direct_ops; -void set_pci_dma_ops(struct dma_mapping_ops *dma_ops) +void set_pci_dma_ops(struct dma_map_ops *dma_ops) { pci_dma_ops = dma_ops; } -struct dma_mapping_ops *get_pci_dma_ops(void) +struct dma_map_ops *get_pci_dma_ops(void) { return pci_dma_ops; } diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 819e59f6f7c7..bc7b41edbdfc 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -601,7 +601,7 @@ static void vio_dma_iommu_unmap_sg(struct device *dev, vio_cmo_dealloc(viodev, alloc_size); } -struct dma_mapping_ops vio_dma_mapping_ops = { +struct dma_map_ops vio_dma_mapping_ops = { .alloc_coherent = vio_dma_iommu_alloc_coherent, .free_coherent = vio_dma_iommu_free_coherent, .map_sg = vio_dma_iommu_map_sg, -- cgit From 4a9a6bfe707cfe5bcb0a20eabe240293a095cd10 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 4 Aug 2009 19:08:27 +0000 Subject: powerpc: Handle SWIOTLB mapping error properly Signed-off-by: FUJITA Tomonori Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/dma-swiotlb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index d1143a68d82a..e96cbbd9b449 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -42,7 +42,8 @@ struct dma_map_ops swiotlb_dma_ops = { .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, .sync_single_range_for_device = swiotlb_sync_single_range_for_device, .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = swiotlb_sync_sg_for_device + .sync_sg_for_device = swiotlb_sync_sg_for_device, + .mapping_error = swiotlb_dma_mapping_error, }; void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev) -- cgit From 80d3e8abb73dad3983fef2597b52cab8fbcd876b Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 4 Aug 2009 19:08:28 +0000 Subject: powerpc: Add CONFIG_DMA_API_DEBUG support Signed-off-by: FUJITA Tomonori Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/dma.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index c61f70e145ad..21b784d7e7d0 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -156,3 +157,13 @@ struct dma_map_ops dma_direct_ops = { #endif }; EXPORT_SYMBOL(dma_direct_ops); + +#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) + +static int __init dma_init(void) +{ + dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + + return 0; +} +fs_initcall(dma_init); -- cgit From f45c4486f70d0a6502e7499a8664cdc0bba84cd2 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Tue, 18 Aug 2009 19:08:30 +0000 Subject: powerpc/book3e-64: Move the default cpu table entry Move the default cpu entry table for CONFIG_PPC_BOOK3E_64 to the very end since we will probably want to support both 32-bit and 64-bit kernels for some processors that are higher up in the list. Signed-off-by: Kumar Gala Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/cputable.c | 49 +++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 22 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 9f38ecb17859..0b9c9135922e 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -89,8 +89,12 @@ extern void __restore_cpu_power7(void); #define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\ PPC_FEATURE_TRUE_LE | \ PPC_FEATURE_HAS_ALTIVEC_COMP) +#ifdef CONFIG_PPC_BOOK3E_64 +#define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE) +#else #define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ PPC_FEATURE_BOOKE) +#endif static struct cpu_spec __initdata cpu_specs[] = { #ifdef CONFIG_PPC_BOOK3S_64 @@ -509,28 +513,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .platform = "power4", } #endif /* CONFIG_PPC_BOOK3S_64 */ -#ifdef CONFIG_PPC_BOOK3E_64 - { /* This is a default entry to get going, to be replaced by - * a real one at some stage - */ -#define CPU_FTRS_BASE_BOOK3E (CPU_FTR_USE_TB | \ - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_SMT | \ - CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "Book3E", - .cpu_features = CPU_FTRS_BASE_BOOK3E, - .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX | - MMU_FTR_USE_TLBIVAX_BCAST | - MMU_FTR_LOCK_BCAST_INVAL, - .icache_bsize = 64, - .dcache_bsize = 64, - .num_pmcs = 0, - .machine_check = machine_check_generic, - .platform = "power6", - }, -#endif #ifdef CONFIG_PPC32 #if CLASSIC_PPC @@ -1846,6 +1828,29 @@ static struct cpu_spec __initdata cpu_specs[] = { } #endif /* CONFIG_E500 */ #endif /* CONFIG_PPC32 */ + +#ifdef CONFIG_PPC_BOOK3E_64 + { /* This is a default entry to get going, to be replaced by + * a real one at some stage + */ +#define CPU_FTRS_BASE_BOOK3E (CPU_FTR_USE_TB | \ + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_SMT | \ + CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "Book3E", + .cpu_features = CPU_FTRS_BASE_BOOK3E, + .cpu_user_features = COMMON_USER_PPC64, + .mmu_features = MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX | + MMU_FTR_USE_TLBIVAX_BCAST | + MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 0, + .machine_check = machine_check_generic, + .platform = "power6", + }, +#endif }; static struct cpu_spec the_cpu_spec; -- cgit From 6c188829d2c20a1d02aedb13db34b3ca2a8f0dc4 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Tue, 18 Aug 2009 19:08:31 +0000 Subject: powerpc/book3e-64: Wait til generic_calibrate_decr to enable decrementer Match what we do on 32-bit Book-E processors and enable the decrementer in generic_calibrate_decr. We need to make sure we disable the decrementer early in boot since we currently use lazy (soft) interrupt on 64-bit Book-E and possible get a decrementer exception before we are ready for it. Signed-off-by: Kumar Gala Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64e.S | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 695d4847d228..3611b0e7d46d 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -774,9 +774,11 @@ _STATIC(init_thread_book3e) /* Make sure interrupts are off */ wrteei 0 - /* disable watchdog and FIT and enable DEC interrupts */ - lis r3,TCR_DIE@h + /* disable all timers and clear out status */ + li r3,0 mtspr SPRN_TCR,r3 + mfspr r3,SPRN_TSR + mtspr SPRN_TSR,r3 blr -- cgit From 4b98d9e713a03bd79ced8800e24a56359f9effbf Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Tue, 18 Aug 2009 19:08:32 +0000 Subject: powerpc/book3e-64: Add helper function to setup IVORs Not all 64-bit Book-3E parts will have fixed IVORs so add a function that cpusetup code can call to setup the base IVORs (0..15) to match the fixed offsets. We need to 'or' part of interrupt_base_book3e into the IVORs since on parts that have them the IVPR doesn't extend as far down. Signed-off-by: Kumar Gala Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64e.S | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 3611b0e7d46d..662236c72244 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -782,5 +782,24 @@ _STATIC(init_thread_book3e) blr +_GLOBAL(__setup_base_ivors) + SET_IVOR(0, 0x020) /* Critical Input */ + SET_IVOR(1, 0x000) /* Machine Check */ + SET_IVOR(2, 0x060) /* Data Storage */ + SET_IVOR(3, 0x080) /* Instruction Storage */ + SET_IVOR(4, 0x0a0) /* External Input */ + SET_IVOR(5, 0x0c0) /* Alignment */ + SET_IVOR(6, 0x0e0) /* Program */ + SET_IVOR(7, 0x100) /* FP Unavailable */ + SET_IVOR(8, 0x120) /* System Call */ + SET_IVOR(9, 0x140) /* Auxiliary Processor Unavailable */ + SET_IVOR(10, 0x160) /* Decrementer */ + SET_IVOR(11, 0x180) /* Fixed Interval Timer */ + SET_IVOR(12, 0x1a0) /* Watchdog Timer */ + SET_IVOR(13, 0x1c0) /* Data TLB Error */ + SET_IVOR(14, 0x1e0) /* Instruction TLB Error */ + SET_IVOR(15, 0x040) /* Debug */ + sync + blr -- cgit From bb1af71ecbfdbecbe9f7e43f703da5840b76c2e4 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Tue, 18 Aug 2009 19:08:33 +0000 Subject: powerpc/book3e-64: Add support to initial_tlb_book3e for non-HES TLB We now search through TLBnCFG looking for the first array that has IPROT support (we assume that there is only one). If that TLB has hardware entry select (HES) support we use the existing code and with the proper TLB select (the HES code still needs to clean up bolted entries from firmware). The non-HES code is pretty similiar to the 32-bit FSL Book-E code but does make some new assumtions (like that we have tlbilx) and simplifies things down a bit. Signed-off-by: Kumar Gala Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64e.S | 204 ++++++++++++++++++++++++++++++++++- 1 file changed, 200 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 662236c72244..9048f96237f6 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -616,18 +616,214 @@ bad_stack_book3e: * Setup the initial TLB for a core. This current implementation * assume that whatever we are running off will not conflict with * the new mapping at PAGE_OFFSET. - * We also make various assumptions about the processor we run on, - * this might have to be made more flexible based on the content - * of MMUCFG and friends. */ _GLOBAL(initial_tlb_book3e) + /* Look for the first TLB with IPROT set */ + mfspr r4,SPRN_TLB0CFG + andi. r3,r4,TLBnCFG_IPROT + lis r3,MAS0_TLBSEL(0)@h + bne found_iprot + + mfspr r4,SPRN_TLB1CFG + andi. r3,r4,TLBnCFG_IPROT + lis r3,MAS0_TLBSEL(1)@h + bne found_iprot + + mfspr r4,SPRN_TLB2CFG + andi. r3,r4,TLBnCFG_IPROT + lis r3,MAS0_TLBSEL(2)@h + bne found_iprot + + lis r3,MAS0_TLBSEL(3)@h + mfspr r4,SPRN_TLB3CFG + /* fall through */ + +found_iprot: + andi. r5,r4,TLBnCFG_HES + bne have_hes + + mflr r8 /* save LR */ +/* 1. Find the index of the entry we're executing in + * + * r3 = MAS0_TLBSEL (for the iprot array) + * r4 = SPRN_TLBnCFG + */ + bl invstr /* Find our address */ +invstr: mflr r6 /* Make it accessible */ + mfmsr r7 + rlwinm r5,r7,27,31,31 /* extract MSR[IS] */ + mfspr r7,SPRN_PID + slwi r7,r7,16 + or r7,r7,r5 + mtspr SPRN_MAS6,r7 + tlbsx 0,r6 /* search MSR[IS], SPID=PID */ + + mfspr r3,SPRN_MAS0 + rlwinm r5,r3,16,20,31 /* Extract MAS0(Entry) */ + + mfspr r7,SPRN_MAS1 /* Insure IPROT set */ + oris r7,r7,MAS1_IPROT@h + mtspr SPRN_MAS1,r7 + tlbwe + +/* 2. Invalidate all entries except the entry we're executing in + * + * r3 = MAS0 w/TLBSEL & ESEL for the entry we are running in + * r4 = SPRN_TLBnCFG + * r5 = ESEL of entry we are running in + */ + andi. r4,r4,TLBnCFG_N_ENTRY /* Extract # entries */ + li r6,0 /* Set Entry counter to 0 */ +1: mr r7,r3 /* Set MAS0(TLBSEL) */ + rlwimi r7,r6,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r6) */ + mtspr SPRN_MAS0,r7 + tlbre + mfspr r7,SPRN_MAS1 + rlwinm r7,r7,0,2,31 /* Clear MAS1 Valid and IPROT */ + cmpw r5,r6 + beq skpinv /* Dont update the current execution TLB */ + mtspr SPRN_MAS1,r7 + tlbwe + isync +skpinv: addi r6,r6,1 /* Increment */ + cmpw r6,r4 /* Are we done? */ + bne 1b /* If not, repeat */ + + /* Invalidate all TLBs */ + PPC_TLBILX_ALL(0,0) + sync + isync + +/* 3. Setup a temp mapping and jump to it + * + * r3 = MAS0 w/TLBSEL & ESEL for the entry we are running in + * r5 = ESEL of entry we are running in + */ + andi. r7,r5,0x1 /* Find an entry not used and is non-zero */ + addi r7,r7,0x1 + mr r4,r3 /* Set MAS0(TLBSEL) = 1 */ + mtspr SPRN_MAS0,r4 + tlbre + + rlwimi r4,r7,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r7) */ + mtspr SPRN_MAS0,r4 + + mfspr r7,SPRN_MAS1 + xori r6,r7,MAS1_TS /* Setup TMP mapping in the other Address space */ + mtspr SPRN_MAS1,r6 + + tlbwe + + mfmsr r6 + xori r6,r6,MSR_IS + mtspr SPRN_SRR1,r6 + bl 1f /* Find our address */ +1: mflr r6 + addi r6,r6,(2f - 1b) + mtspr SPRN_SRR0,r6 + rfi +2: + +/* 4. Clear out PIDs & Search info + * + * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in + * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping + * r5 = MAS3 + */ + li r6,0 + mtspr SPRN_MAS6,r6 + mtspr SPRN_PID,r6 + +/* 5. Invalidate mapping we started in + * + * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in + * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping + * r5 = MAS3 + */ + mtspr SPRN_MAS0,r3 + tlbre + mfspr r6,SPRN_MAS1 + rlwinm r6,r6,0,2,0 /* clear IPROT */ + mtspr SPRN_MAS1,r6 + tlbwe + + /* Invalidate TLB1 */ + PPC_TLBILX_ALL(0,0) + sync + isync + +/* The mapping only needs to be cache-coherent on SMP */ +#ifdef CONFIG_SMP +#define M_IF_SMP MAS2_M +#else +#define M_IF_SMP 0 +#endif + +/* 6. Setup KERNELBASE mapping in TLB[0] + * + * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in + * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping + * r5 = MAS3 + */ + rlwinm r3,r3,0,16,3 /* clear ESEL */ + mtspr SPRN_MAS0,r3 + lis r6,(MAS1_VALID|MAS1_IPROT)@h + ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l + mtspr SPRN_MAS1,r6 + + LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET | M_IF_SMP) + mtspr SPRN_MAS2,r6 + + rlwinm r5,r5,0,0,25 + ori r5,r5,MAS3_SR | MAS3_SW | MAS3_SX + mtspr SPRN_MAS3,r5 + li r5,-1 + rlwinm r5,r5,0,0,25 + + tlbwe + +/* 7. Jump to KERNELBASE mapping + * + * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping + */ + /* Now we branch the new virtual address mapped by this entry */ + LOAD_REG_IMMEDIATE(r6,2f) + lis r7,MSR_KERNEL@h + ori r7,r7,MSR_KERNEL@l + mtspr SPRN_SRR0,r6 + mtspr SPRN_SRR1,r7 + rfi /* start execution out of TLB1[0] entry */ +2: + +/* 8. Clear out the temp mapping + * + * r4 = MAS0 w/TLBSEL & ESEL for the entry we are running in + */ + mtspr SPRN_MAS0,r4 + tlbre + mfspr r5,SPRN_MAS1 + rlwinm r5,r5,0,2,0 /* clear IPROT */ + mtspr SPRN_MAS1,r5 + tlbwe + + /* Invalidate TLB1 */ + PPC_TLBILX_ALL(0,0) + sync + isync + + /* We translate LR and return */ + tovirt(r8,r8) + mtlr r8 + blr + +have_hes: /* Setup MAS 0,1,2,3 and 7 for tlbwe of a 1G entry that maps the * kernel linear mapping. We also set MAS8 once for all here though * that will have to be made dependent on whether we are running under * a hypervisor I suppose. */ - li r3,MAS0_HES | MAS0_WQ_ALLWAYS + ori r3,r3,MAS0_HES | MAS0_WQ_ALLWAYS mtspr SPRN_MAS0,r3 lis r3,(MAS1_VALID | MAS1_IPROT)@h ori r3,r3,BOOK3E_PAGESZ_1GB << MAS1_TSIZE_SHIFT -- cgit From ae14e13a4c8bb091dfd5606fd76c9cd272090ab7 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Tue, 25 Aug 2009 20:07:02 +0000 Subject: powerpc/pci: Remove dead checks for CONFIG_PPC_OF PPC_OF is always selected for arch/powerpc. This patch removes the stale #defines Signed-off-by: Grant Likely Acked-by: Stephen Rothwell Acked-by: Kumar Gala Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/pci-common.c | 8 -------- arch/powerpc/kernel/pci_32.c | 9 --------- 2 files changed, 17 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 7585f1fc26db..158a78ae6341 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -176,8 +176,6 @@ int pci_domain_nr(struct pci_bus *bus) } EXPORT_SYMBOL(pci_domain_nr); -#ifdef CONFIG_PPC_OF - /* This routine is meant to be used early during boot, when the * PCI bus numbers have not yet been assigned, and you need to * issue PCI config cycles to an OF device. @@ -210,17 +208,11 @@ static ssize_t pci_show_devspec(struct device *dev, return sprintf(buf, "%s", np->full_name); } static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL); -#endif /* CONFIG_PPC_OF */ /* Add sysfs properties */ int pcibios_add_platform_entries(struct pci_dev *pdev) { -#ifdef CONFIG_PPC_OF return device_create_file(&pdev->dev, &dev_attr_devspec); -#else - return 0; -#endif /* CONFIG_PPC_OF */ - } char __devinit *pcibios_setup(char *str) diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 3ae1c666ff92..1e807fe7ad2c 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -34,9 +34,7 @@ int pcibios_assign_bus_offset = 1; void pcibios_make_OF_bus_map(void); static void fixup_cpc710_pci64(struct pci_dev* dev); -#ifdef CONFIG_PPC_OF static u8* pci_to_OF_bus_map; -#endif /* By default, we don't re-assign bus numbers. We do this only on * some pmacs @@ -83,7 +81,6 @@ fixup_cpc710_pci64(struct pci_dev* dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CPC710_PCI64, fixup_cpc710_pci64); -#ifdef CONFIG_PPC_OF /* * Functions below are used on OpenFirmware machines. */ @@ -357,12 +354,6 @@ pci_create_OF_bus_map(void) } } -#else /* CONFIG_PPC_OF */ -void pcibios_make_OF_bus_map(void) -{ -} -#endif /* CONFIG_PPC_OF */ - static void __devinit pcibios_scan_phb(struct pci_controller *hose) { struct pci_bus *bus; -- cgit From fbe65447197789a3ccccc27755956f6a4c445089 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Tue, 25 Aug 2009 20:07:11 +0000 Subject: powerpc/pci: move pci_64.c device tree scanning code into pci-common.c The PCI device tree scanning code in pci_64.c is some useful functionality. It allows PCI devices to be described in the device tree instead of being probed for, which in turn allows pci devices to use all of the device tree facilities to describe complex PCI bus architectures like GPIO and IRQ routing (perhaps not a common situation for desktop or server systems, but useful for embedded systems with on-board PCI devices). This patch moves the device tree scanning into pci-common.c so it is available for 32-bit powerpc machines too. Signed-off-by: Grant Likely Acked-by: Kumar Gala Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/pci-common.c | 1 - arch/powerpc/kernel/pci_64.c | 289 ------------------------------ arch/powerpc/kernel/pci_of_scan.c | 358 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 359 insertions(+), 291 deletions(-) create mode 100644 arch/powerpc/kernel/pci_of_scan.c (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 7c83edbc2155..569f79ccd310 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -88,7 +88,7 @@ obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o pci64-$(CONFIG_PPC64) += pci_dn.o isa-bridge.o obj-$(CONFIG_PCI) += pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \ - pci-common.o + pci-common.o pci_of_scan.o obj-$(CONFIG_PCI_MSI) += msi.o obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \ machine_kexec_$(CONFIG_WORD_SIZE).o diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 158a78ae6341..725ea9144e38 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1617,4 +1617,3 @@ void __devinit pcibios_setup_phb_resources(struct pci_controller *hose) (unsigned long)hose->io_base_virt - _IO_BASE); } - diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 9e8902fa14c7..4d5b4ced7e45 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -43,295 +43,6 @@ unsigned long pci_probe_only = 1; unsigned long pci_io_base = ISA_IO_BASE; EXPORT_SYMBOL(pci_io_base); -static u32 get_int_prop(struct device_node *np, const char *name, u32 def) -{ - const u32 *prop; - int len; - - prop = of_get_property(np, name, &len); - if (prop && len >= 4) - return *prop; - return def; -} - -static unsigned int pci_parse_of_flags(u32 addr0, int bridge) -{ - unsigned int flags = 0; - - if (addr0 & 0x02000000) { - flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY; - flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64; - flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M; - if (addr0 & 0x40000000) - flags |= IORESOURCE_PREFETCH - | PCI_BASE_ADDRESS_MEM_PREFETCH; - /* Note: We don't know whether the ROM has been left enabled - * by the firmware or not. We mark it as disabled (ie, we do - * not set the IORESOURCE_ROM_ENABLE flag) for now rather than - * do a config space read, it will be force-enabled if needed - */ - if (!bridge && (addr0 & 0xff) == 0x30) - flags |= IORESOURCE_READONLY; - } else if (addr0 & 0x01000000) - flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO; - if (flags) - flags |= IORESOURCE_SIZEALIGN; - return flags; -} - - -static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev) -{ - u64 base, size; - unsigned int flags; - struct resource *res; - const u32 *addrs; - u32 i; - int proplen; - - addrs = of_get_property(node, "assigned-addresses", &proplen); - if (!addrs) - return; - pr_debug(" parse addresses (%d bytes) @ %p\n", proplen, addrs); - for (; proplen >= 20; proplen -= 20, addrs += 5) { - flags = pci_parse_of_flags(addrs[0], 0); - if (!flags) - continue; - base = of_read_number(&addrs[1], 2); - size = of_read_number(&addrs[3], 2); - if (!size) - continue; - i = addrs[0] & 0xff; - pr_debug(" base: %llx, size: %llx, i: %x\n", - (unsigned long long)base, - (unsigned long long)size, i); - - if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) { - res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2]; - } else if (i == dev->rom_base_reg) { - res = &dev->resource[PCI_ROM_RESOURCE]; - flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE; - } else { - printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i); - continue; - } - res->start = base; - res->end = base + size - 1; - res->flags = flags; - res->name = pci_name(dev); - } -} - -struct pci_dev *of_create_pci_dev(struct device_node *node, - struct pci_bus *bus, int devfn) -{ - struct pci_dev *dev; - const char *type; - - dev = alloc_pci_dev(); - if (!dev) - return NULL; - type = of_get_property(node, "device_type", NULL); - if (type == NULL) - type = ""; - - pr_debug(" create device, devfn: %x, type: %s\n", devfn, type); - - dev->bus = bus; - dev->sysdata = node; - dev->dev.parent = bus->bridge; - dev->dev.bus = &pci_bus_type; - dev->devfn = devfn; - dev->multifunction = 0; /* maybe a lie? */ - - dev->vendor = get_int_prop(node, "vendor-id", 0xffff); - dev->device = get_int_prop(node, "device-id", 0xffff); - dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0); - dev->subsystem_device = get_int_prop(node, "subsystem-id", 0); - - dev->cfg_size = pci_cfg_space_size(dev); - - dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(bus), - dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn)); - dev->class = get_int_prop(node, "class-code", 0); - dev->revision = get_int_prop(node, "revision-id", 0); - - pr_debug(" class: 0x%x\n", dev->class); - pr_debug(" revision: 0x%x\n", dev->revision); - - dev->current_state = 4; /* unknown power state */ - dev->error_state = pci_channel_io_normal; - dev->dma_mask = 0xffffffff; - - if (!strcmp(type, "pci") || !strcmp(type, "pciex")) { - /* a PCI-PCI bridge */ - dev->hdr_type = PCI_HEADER_TYPE_BRIDGE; - dev->rom_base_reg = PCI_ROM_ADDRESS1; - } else if (!strcmp(type, "cardbus")) { - dev->hdr_type = PCI_HEADER_TYPE_CARDBUS; - } else { - dev->hdr_type = PCI_HEADER_TYPE_NORMAL; - dev->rom_base_reg = PCI_ROM_ADDRESS; - /* Maybe do a default OF mapping here */ - dev->irq = NO_IRQ; - } - - pci_parse_of_addrs(node, dev); - - pr_debug(" adding to system ...\n"); - - pci_device_add(dev, bus); - - return dev; -} -EXPORT_SYMBOL(of_create_pci_dev); - -static void __devinit __of_scan_bus(struct device_node *node, - struct pci_bus *bus, int rescan_existing) -{ - struct device_node *child; - const u32 *reg; - int reglen, devfn; - struct pci_dev *dev; - - pr_debug("of_scan_bus(%s) bus no %d... \n", - node->full_name, bus->number); - - /* Scan direct children */ - for_each_child_of_node(node, child) { - pr_debug(" * %s\n", child->full_name); - reg = of_get_property(child, "reg", ®len); - if (reg == NULL || reglen < 20) - continue; - devfn = (reg[0] >> 8) & 0xff; - - /* create a new pci_dev for this device */ - dev = of_create_pci_dev(child, bus, devfn); - if (!dev) - continue; - pr_debug(" dev header type: %x\n", dev->hdr_type); - } - - /* Apply all fixups necessary. We don't fixup the bus "self" - * for an existing bridge that is being rescanned - */ - if (!rescan_existing) - pcibios_setup_bus_self(bus); - pcibios_setup_bus_devices(bus); - - /* Now scan child busses */ - list_for_each_entry(dev, &bus->devices, bus_list) { - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || - dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) { - struct device_node *child = pci_device_to_OF_node(dev); - if (dev) - of_scan_pci_bridge(child, dev); - } - } -} - -void __devinit of_scan_bus(struct device_node *node, - struct pci_bus *bus) -{ - __of_scan_bus(node, bus, 0); -} -EXPORT_SYMBOL_GPL(of_scan_bus); - -void __devinit of_rescan_bus(struct device_node *node, - struct pci_bus *bus) -{ - __of_scan_bus(node, bus, 1); -} -EXPORT_SYMBOL_GPL(of_rescan_bus); - -void __devinit of_scan_pci_bridge(struct device_node *node, - struct pci_dev *dev) -{ - struct pci_bus *bus; - const u32 *busrange, *ranges; - int len, i, mode; - struct resource *res; - unsigned int flags; - u64 size; - - pr_debug("of_scan_pci_bridge(%s)\n", node->full_name); - - /* parse bus-range property */ - busrange = of_get_property(node, "bus-range", &len); - if (busrange == NULL || len != 8) { - printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %s\n", - node->full_name); - return; - } - ranges = of_get_property(node, "ranges", &len); - if (ranges == NULL) { - printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %s\n", - node->full_name); - return; - } - - bus = pci_add_new_bus(dev->bus, dev, busrange[0]); - if (!bus) { - printk(KERN_ERR "Failed to create pci bus for %s\n", - node->full_name); - return; - } - - bus->primary = dev->bus->number; - bus->subordinate = busrange[1]; - bus->bridge_ctl = 0; - bus->sysdata = node; - - /* parse ranges property */ - /* PCI #address-cells == 3 and #size-cells == 2 always */ - res = &dev->resource[PCI_BRIDGE_RESOURCES]; - for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) { - res->flags = 0; - bus->resource[i] = res; - ++res; - } - i = 1; - for (; len >= 32; len -= 32, ranges += 8) { - flags = pci_parse_of_flags(ranges[0], 1); - size = of_read_number(&ranges[6], 2); - if (flags == 0 || size == 0) - continue; - if (flags & IORESOURCE_IO) { - res = bus->resource[0]; - if (res->flags) { - printk(KERN_ERR "PCI: ignoring extra I/O range" - " for bridge %s\n", node->full_name); - continue; - } - } else { - if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) { - printk(KERN_ERR "PCI: too many memory ranges" - " for bridge %s\n", node->full_name); - continue; - } - res = bus->resource[i]; - ++i; - } - res->start = of_read_number(&ranges[1], 2); - res->end = res->start + size - 1; - res->flags = flags; - } - sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus), - bus->number); - pr_debug(" bus name: %s\n", bus->name); - - mode = PCI_PROBE_NORMAL; - if (ppc_md.pci_probe_mode) - mode = ppc_md.pci_probe_mode(bus); - pr_debug(" probe mode: %d\n", mode); - - if (mode == PCI_PROBE_DEVTREE) - of_scan_bus(node, bus); - else if (mode == PCI_PROBE_NORMAL) - pci_scan_child_bus(bus); -} -EXPORT_SYMBOL(of_scan_pci_bridge); - void __devinit scan_phb(struct pci_controller *hose) { struct pci_bus *bus; diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c new file mode 100644 index 000000000000..72c31bcb7aa4 --- /dev/null +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -0,0 +1,358 @@ +/* + * Helper routines to scan the device tree for PCI devices and busses + * + * Migrated out of PowerPC architecture pci_64.c file by Grant Likely + * so that these routines are available for + * 32 bit also. + * + * Copyright (C) 2003 Anton Blanchard , IBM + * Rework, based on alpha PCI code. + * Copyright (c) 2009 Secret Lab Technologies Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include +#include +#include + +/** + * get_int_prop - Decode a u32 from a device tree property + */ +static u32 get_int_prop(struct device_node *np, const char *name, u32 def) +{ + const u32 *prop; + int len; + + prop = of_get_property(np, name, &len); + if (prop && len >= 4) + return *prop; + return def; +} + +/** + * pci_parse_of_flags - Parse the flags cell of a device tree PCI address + * @addr0: value of 1st cell of a device tree PCI address. + * @bridge: Set this flag if the address is from a bridge 'ranges' property + */ +unsigned int pci_parse_of_flags(u32 addr0, int bridge) +{ + unsigned int flags = 0; + + if (addr0 & 0x02000000) { + flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY; + flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64; + flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M; + if (addr0 & 0x40000000) + flags |= IORESOURCE_PREFETCH + | PCI_BASE_ADDRESS_MEM_PREFETCH; + /* Note: We don't know whether the ROM has been left enabled + * by the firmware or not. We mark it as disabled (ie, we do + * not set the IORESOURCE_ROM_ENABLE flag) for now rather than + * do a config space read, it will be force-enabled if needed + */ + if (!bridge && (addr0 & 0xff) == 0x30) + flags |= IORESOURCE_READONLY; + } else if (addr0 & 0x01000000) + flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO; + if (flags) + flags |= IORESOURCE_SIZEALIGN; + return flags; +} + +/** + * of_pci_parse_addrs - Parse PCI addresses assigned in the device tree node + * @node: device tree node for the PCI device + * @dev: pci_dev structure for the device + * + * This function parses the 'assigned-addresses' property of a PCI devices' + * device tree node and writes them into the associated pci_dev structure. + */ +static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev) +{ + u64 base, size; + unsigned int flags; + struct resource *res; + const u32 *addrs; + u32 i; + int proplen; + + addrs = of_get_property(node, "assigned-addresses", &proplen); + if (!addrs) + return; + pr_debug(" parse addresses (%d bytes) @ %p\n", proplen, addrs); + for (; proplen >= 20; proplen -= 20, addrs += 5) { + flags = pci_parse_of_flags(addrs[0], 0); + if (!flags) + continue; + base = of_read_number(&addrs[1], 2); + size = of_read_number(&addrs[3], 2); + if (!size) + continue; + i = addrs[0] & 0xff; + pr_debug(" base: %llx, size: %llx, i: %x\n", + (unsigned long long)base, + (unsigned long long)size, i); + + if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) { + res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2]; + } else if (i == dev->rom_base_reg) { + res = &dev->resource[PCI_ROM_RESOURCE]; + flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE; + } else { + printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i); + continue; + } + res->start = base; + res->end = base + size - 1; + res->flags = flags; + res->name = pci_name(dev); + } +} + +/** + * of_create_pci_dev - Given a device tree node on a pci bus, create a pci_dev + * @node: device tree node pointer + * @bus: bus the device is sitting on + * @devfn: PCI function number, extracted from device tree by caller. + */ +struct pci_dev *of_create_pci_dev(struct device_node *node, + struct pci_bus *bus, int devfn) +{ + struct pci_dev *dev; + const char *type; + + dev = alloc_pci_dev(); + if (!dev) + return NULL; + type = of_get_property(node, "device_type", NULL); + if (type == NULL) + type = ""; + + pr_debug(" create device, devfn: %x, type: %s\n", devfn, type); + + dev->bus = bus; + dev->sysdata = node; + dev->dev.parent = bus->bridge; + dev->dev.bus = &pci_bus_type; + dev->devfn = devfn; + dev->multifunction = 0; /* maybe a lie? */ + + dev->vendor = get_int_prop(node, "vendor-id", 0xffff); + dev->device = get_int_prop(node, "device-id", 0xffff); + dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0); + dev->subsystem_device = get_int_prop(node, "subsystem-id", 0); + + dev->cfg_size = pci_cfg_space_size(dev); + + dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(bus), + dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn)); + dev->class = get_int_prop(node, "class-code", 0); + dev->revision = get_int_prop(node, "revision-id", 0); + + pr_debug(" class: 0x%x\n", dev->class); + pr_debug(" revision: 0x%x\n", dev->revision); + + dev->current_state = 4; /* unknown power state */ + dev->error_state = pci_channel_io_normal; + dev->dma_mask = 0xffffffff; + + if (!strcmp(type, "pci") || !strcmp(type, "pciex")) { + /* a PCI-PCI bridge */ + dev->hdr_type = PCI_HEADER_TYPE_BRIDGE; + dev->rom_base_reg = PCI_ROM_ADDRESS1; + } else if (!strcmp(type, "cardbus")) { + dev->hdr_type = PCI_HEADER_TYPE_CARDBUS; + } else { + dev->hdr_type = PCI_HEADER_TYPE_NORMAL; + dev->rom_base_reg = PCI_ROM_ADDRESS; + /* Maybe do a default OF mapping here */ + dev->irq = NO_IRQ; + } + + of_pci_parse_addrs(node, dev); + + pr_debug(" adding to system ...\n"); + + pci_device_add(dev, bus); + + return dev; +} +EXPORT_SYMBOL(of_create_pci_dev); + +/** + * of_scan_pci_bridge - Set up a PCI bridge and scan for child nodes + * @node: device tree node of bridge + * @dev: pci_dev structure for the bridge + * + * of_scan_bus() calls this routine for each PCI bridge that it finds, and + * this routine in turn call of_scan_bus() recusively to scan for more child + * devices. + */ +void __devinit of_scan_pci_bridge(struct device_node *node, + struct pci_dev *dev) +{ + struct pci_bus *bus; + const u32 *busrange, *ranges; + int len, i, mode; + struct resource *res; + unsigned int flags; + u64 size; + + pr_debug("of_scan_pci_bridge(%s)\n", node->full_name); + + /* parse bus-range property */ + busrange = of_get_property(node, "bus-range", &len); + if (busrange == NULL || len != 8) { + printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %s\n", + node->full_name); + return; + } + ranges = of_get_property(node, "ranges", &len); + if (ranges == NULL) { + printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %s\n", + node->full_name); + return; + } + + bus = pci_add_new_bus(dev->bus, dev, busrange[0]); + if (!bus) { + printk(KERN_ERR "Failed to create pci bus for %s\n", + node->full_name); + return; + } + + bus->primary = dev->bus->number; + bus->subordinate = busrange[1]; + bus->bridge_ctl = 0; + bus->sysdata = node; + + /* parse ranges property */ + /* PCI #address-cells == 3 and #size-cells == 2 always */ + res = &dev->resource[PCI_BRIDGE_RESOURCES]; + for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) { + res->flags = 0; + bus->resource[i] = res; + ++res; + } + i = 1; + for (; len >= 32; len -= 32, ranges += 8) { + flags = pci_parse_of_flags(ranges[0], 1); + size = of_read_number(&ranges[6], 2); + if (flags == 0 || size == 0) + continue; + if (flags & IORESOURCE_IO) { + res = bus->resource[0]; + if (res->flags) { + printk(KERN_ERR "PCI: ignoring extra I/O range" + " for bridge %s\n", node->full_name); + continue; + } + } else { + if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) { + printk(KERN_ERR "PCI: too many memory ranges" + " for bridge %s\n", node->full_name); + continue; + } + res = bus->resource[i]; + ++i; + } + res->start = of_read_number(&ranges[1], 2); + res->end = res->start + size - 1; + res->flags = flags; + } + sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus), + bus->number); + pr_debug(" bus name: %s\n", bus->name); + + mode = PCI_PROBE_NORMAL; + if (ppc_md.pci_probe_mode) + mode = ppc_md.pci_probe_mode(bus); + pr_debug(" probe mode: %d\n", mode); + + if (mode == PCI_PROBE_DEVTREE) + of_scan_bus(node, bus); + else if (mode == PCI_PROBE_NORMAL) + pci_scan_child_bus(bus); +} +EXPORT_SYMBOL(of_scan_pci_bridge); + +/** + * __of_scan_bus - given a PCI bus node, setup bus and scan for child devices + * @node: device tree node for the PCI bus + * @bus: pci_bus structure for the PCI bus + * @rescan_existing: Flag indicating bus has already been set up + */ +static void __devinit __of_scan_bus(struct device_node *node, + struct pci_bus *bus, int rescan_existing) +{ + struct device_node *child; + const u32 *reg; + int reglen, devfn; + struct pci_dev *dev; + + pr_debug("of_scan_bus(%s) bus no %d... \n", + node->full_name, bus->number); + + /* Scan direct children */ + for_each_child_of_node(node, child) { + pr_debug(" * %s\n", child->full_name); + reg = of_get_property(child, "reg", ®len); + if (reg == NULL || reglen < 20) + continue; + devfn = (reg[0] >> 8) & 0xff; + + /* create a new pci_dev for this device */ + dev = of_create_pci_dev(child, bus, devfn); + if (!dev) + continue; + pr_debug(" dev header type: %x\n", dev->hdr_type); + } + + /* Apply all fixups necessary. We don't fixup the bus "self" + * for an existing bridge that is being rescanned + */ + if (!rescan_existing) + pcibios_setup_bus_self(bus); + pcibios_setup_bus_devices(bus); + + /* Now scan child busses */ + list_for_each_entry(dev, &bus->devices, bus_list) { + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || + dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) { + struct device_node *child = pci_device_to_OF_node(dev); + if (dev) + of_scan_pci_bridge(child, dev); + } + } +} + +/** + * of_scan_bus - given a PCI bus node, setup bus and scan for child devices + * @node: device tree node for the PCI bus + * @bus: pci_bus structure for the PCI bus + */ +void __devinit of_scan_bus(struct device_node *node, + struct pci_bus *bus) +{ + __of_scan_bus(node, bus, 0); +} +EXPORT_SYMBOL_GPL(of_scan_bus); + +/** + * of_rescan_bus - given a PCI bus node, scan for child devices + * @node: device tree node for the PCI bus + * @bus: pci_bus structure for the PCI bus + * + * Same as of_scan_bus, but for a pci_bus structure that has already been + * setup. + */ +void __devinit of_rescan_bus(struct device_node *node, + struct pci_bus *bus) +{ + __of_scan_bus(node, bus, 1); +} +EXPORT_SYMBOL_GPL(of_rescan_bus); + -- cgit From 89c2dd62a389c5fed07c4b13c906c43214fc7491 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Tue, 25 Aug 2009 16:20:45 +0000 Subject: powerpc/pci: Pull ppc32 PCI features into common Some of the PCI features we have in ppc32 we will need on ppc64 platforms in the future. These include support for: * ppc_md.pci_exclude_device * indirect config cycles * early config cycles We also simplified the logic in fake_pci_bus() to assume it will always get a valid pci_controller. Since all current callers seem to pass it one. Signed-off-by: Kumar Gala Acked-by: Grant Likely Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/pci-common.c | 71 ++++++++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/pci_32.c | 71 ---------------------------------------- 2 files changed, 71 insertions(+), 71 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 725ea9144e38..8f84a9a8428e 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1617,3 +1617,74 @@ void __devinit pcibios_setup_phb_resources(struct pci_controller *hose) (unsigned long)hose->io_base_virt - _IO_BASE); } + +/* + * Null PCI config access functions, for the case when we can't + * find a hose. + */ +#define NULL_PCI_OP(rw, size, type) \ +static int \ +null_##rw##_config_##size(struct pci_dev *dev, int offset, type val) \ +{ \ + return PCIBIOS_DEVICE_NOT_FOUND; \ +} + +static int +null_read_config(struct pci_bus *bus, unsigned int devfn, int offset, + int len, u32 *val) +{ + return PCIBIOS_DEVICE_NOT_FOUND; +} + +static int +null_write_config(struct pci_bus *bus, unsigned int devfn, int offset, + int len, u32 val) +{ + return PCIBIOS_DEVICE_NOT_FOUND; +} + +static struct pci_ops null_pci_ops = +{ + .read = null_read_config, + .write = null_write_config, +}; + +/* + * These functions are used early on before PCI scanning is done + * and all of the pci_dev and pci_bus structures have been created. + */ +static struct pci_bus * +fake_pci_bus(struct pci_controller *hose, int busnr) +{ + static struct pci_bus bus; + + if (hose == 0) { + printk(KERN_ERR "Can't find hose for PCI bus %d!\n", busnr); + } + bus.number = busnr; + bus.sysdata = hose; + bus.ops = hose? hose->ops: &null_pci_ops; + return &bus; +} + +#define EARLY_PCI_OP(rw, size, type) \ +int early_##rw##_config_##size(struct pci_controller *hose, int bus, \ + int devfn, int offset, type value) \ +{ \ + return pci_bus_##rw##_config_##size(fake_pci_bus(hose, bus), \ + devfn, offset, value); \ +} + +EARLY_PCI_OP(read, byte, u8 *) +EARLY_PCI_OP(read, word, u16 *) +EARLY_PCI_OP(read, dword, u32 *) +EARLY_PCI_OP(write, byte, u8) +EARLY_PCI_OP(write, word, u16) +EARLY_PCI_OP(write, dword, u32) + +extern int pci_bus_find_capability (struct pci_bus *bus, unsigned int devfn, int cap); +int early_find_capability(struct pci_controller *hose, int bus, int devfn, + int cap) +{ + return pci_bus_find_capability(fake_pci_bus(hose, bus), devfn, cap); +} diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 1e807fe7ad2c..8cf15d961c38 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -469,75 +469,4 @@ long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn) return result; } -/* - * Null PCI config access functions, for the case when we can't - * find a hose. - */ -#define NULL_PCI_OP(rw, size, type) \ -static int \ -null_##rw##_config_##size(struct pci_dev *dev, int offset, type val) \ -{ \ - return PCIBIOS_DEVICE_NOT_FOUND; \ -} - -static int -null_read_config(struct pci_bus *bus, unsigned int devfn, int offset, - int len, u32 *val) -{ - return PCIBIOS_DEVICE_NOT_FOUND; -} - -static int -null_write_config(struct pci_bus *bus, unsigned int devfn, int offset, - int len, u32 val) -{ - return PCIBIOS_DEVICE_NOT_FOUND; -} - -static struct pci_ops null_pci_ops = -{ - .read = null_read_config, - .write = null_write_config, -}; -/* - * These functions are used early on before PCI scanning is done - * and all of the pci_dev and pci_bus structures have been created. - */ -static struct pci_bus * -fake_pci_bus(struct pci_controller *hose, int busnr) -{ - static struct pci_bus bus; - - if (hose == 0) { - hose = pci_bus_to_hose(busnr); - if (hose == 0) - printk(KERN_ERR "Can't find hose for PCI bus %d!\n", busnr); - } - bus.number = busnr; - bus.sysdata = hose; - bus.ops = hose? hose->ops: &null_pci_ops; - return &bus; -} - -#define EARLY_PCI_OP(rw, size, type) \ -int early_##rw##_config_##size(struct pci_controller *hose, int bus, \ - int devfn, int offset, type value) \ -{ \ - return pci_bus_##rw##_config_##size(fake_pci_bus(hose, bus), \ - devfn, offset, value); \ -} - -EARLY_PCI_OP(read, byte, u8 *) -EARLY_PCI_OP(read, word, u16 *) -EARLY_PCI_OP(read, dword, u32 *) -EARLY_PCI_OP(write, byte, u8) -EARLY_PCI_OP(write, word, u16) -EARLY_PCI_OP(write, dword, u32) - -extern int pci_bus_find_capability (struct pci_bus *bus, unsigned int devfn, int cap); -int early_find_capability(struct pci_controller *hose, int bus, int devfn, - int cap) -{ - return pci_bus_find_capability(fake_pci_bus(hose, bus), devfn, cap); -} -- cgit From 77c0a700c1c292edafa11c1e52821ce4636f81b0 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 28 Aug 2009 14:25:04 +1000 Subject: powerpc: Properly start decrementer on BookE secondary CPUs This moves the code to start the decrementer on 40x and BookE into a separate function which is now called from time_init() and secondary_time_init(), before the respective clock sources are registered. We also remove the 85xx specific code for doing it from the platform code. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/time.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index edb1edb36469..a180b4f9a4f6 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -727,6 +727,18 @@ static int __init get_freq(char *name, int cells, unsigned long *val) return found; } +/* should become __cpuinit when secondary_cpu_time_init also is */ +void start_cpu_decrementer(void) +{ +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) + /* Clear any pending timer interrupts */ + mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS); + + /* Enable decrementer interrupt */ + mtspr(SPRN_TCR, TCR_DIE); +#endif /* defined(CONFIG_BOOKE) || defined(CONFIG_40x) */ +} + void __init generic_calibrate_decr(void) { ppc_tb_freq = DEFAULT_TB_FREQ; /* hardcoded default */ @@ -746,14 +758,6 @@ void __init generic_calibrate_decr(void) printk(KERN_ERR "WARNING: Estimating processor frequency " "(not found)\n"); } - -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) - /* Clear any pending timer interrupts */ - mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS); - - /* Enable decrementer interrupt */ - mtspr(SPRN_TCR, TCR_DIE); -#endif } int update_persistent_clock(struct timespec now) @@ -914,6 +918,11 @@ static void __init init_decrementer_clockevent(void) void secondary_cpu_time_init(void) { + /* Start the decrementer on CPUs that have manual control + * such as BookE + */ + start_cpu_decrementer(); + /* FIME: Should make unrelatred change to move snapshot_timebase * call here ! */ register_decrementer_clockevent(smp_processor_id()); @@ -1017,6 +1026,11 @@ void __init time_init(void) write_sequnlock_irqrestore(&xtime_lock, flags); + /* Start the decrementer on CPUs that have manual control + * such as BookE + */ + start_cpu_decrementer(); + /* Register the clocksource, if we're not running on iSeries */ if (!firmware_has_feature(FW_FEATURE_ISERIES)) clocksource_init(); -- cgit From 0ed2c722c650513ba4bce868c7a052e576c060e2 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Fri, 28 Aug 2009 08:58:16 +0000 Subject: powerpc/pci: Merge ppc32 and ppc64 versions of phb_scan() The two versions are doing almost exactly the same thing. No need to maintain them as separate files. This patch also has the side effect of making the PCI device tree scanning code available to 32 bit powerpc machines, but no board ports actually make use of this feature at this point. Signed-off-by: Grant Likely Acked-by: Kumar Gala Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/of_platform.c | 2 +- arch/powerpc/kernel/pci-common.c | 49 +++++++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/pci_32.c | 25 ++------------------ arch/powerpc/kernel/pci_64.c | 46 +++++------------------------------- 4 files changed, 58 insertions(+), 64 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index 87df428e3588..1a4fc0d11a03 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -276,7 +276,7 @@ static int __devinit of_pci_phb_probe(struct of_device *dev, #endif /* CONFIG_EEH */ /* Scan the bus */ - scan_phb(phb); + pcibios_scan_phb(phb, dev->node); if (phb->bus == NULL) return -ENXIO; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 8f84a9a8428e..e9f4840096b3 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1688,3 +1688,52 @@ int early_find_capability(struct pci_controller *hose, int bus, int devfn, { return pci_bus_find_capability(fake_pci_bus(hose, bus), devfn, cap); } + +/** + * pci_scan_phb - Given a pci_controller, setup and scan the PCI bus + * @hose: Pointer to the PCI host controller instance structure + * @sysdata: value to use for sysdata pointer. ppc32 and ppc64 differ here + * + * Note: the 'data' pointer is a temporary measure. As 32 and 64 bit + * pci code gets merged, this parameter should become unnecessary because + * both will use the same value. + */ +void __devinit pcibios_scan_phb(struct pci_controller *hose, void *sysdata) +{ + struct pci_bus *bus; + struct device_node *node = hose->dn; + int mode; + + pr_debug("PCI: Scanning PHB %s\n", + node ? node->full_name : ""); + + /* Create an empty bus for the toplevel */ + bus = pci_create_bus(hose->parent, hose->first_busno, hose->ops, + sysdata); + if (bus == NULL) { + pr_err("Failed to create bus for PCI domain %04x\n", + hose->global_number); + return; + } + bus->secondary = hose->first_busno; + hose->bus = bus; + + /* Get some IO space for the new PHB */ + pcibios_setup_phb_io_space(hose); + + /* Wire up PHB bus resources */ + pcibios_setup_phb_resources(hose); + + /* Get probe mode and perform scan */ + mode = PCI_PROBE_NORMAL; + if (node && ppc_md.pci_probe_mode) + mode = ppc_md.pci_probe_mode(bus); + pr_debug(" probe mode: %d\n", mode); + if (mode == PCI_PROBE_DEVTREE) { + bus->subordinate = hose->last_busno; + of_scan_bus(node, bus); + } + + if (mode == PCI_PROBE_NORMAL) + hose->last_busno = bus->subordinate = pci_scan_child_bus(bus); +} diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 8cf15d961c38..c13668cf36d9 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -354,36 +354,15 @@ pci_create_OF_bus_map(void) } } -static void __devinit pcibios_scan_phb(struct pci_controller *hose) +void __devinit pcibios_setup_phb_io_space(struct pci_controller *hose) { - struct pci_bus *bus; - struct device_node *node = hose->dn; unsigned long io_offset; struct resource *res = &hose->io_resource; - pr_debug("PCI: Scanning PHB %s\n", - node ? node->full_name : ""); - - /* Create an empty bus for the toplevel */ - bus = pci_create_bus(hose->parent, hose->first_busno, hose->ops, hose); - if (bus == NULL) { - printk(KERN_ERR "Failed to create bus for PCI domain %04x\n", - hose->global_number); - return; - } - bus->secondary = hose->first_busno; - hose->bus = bus; - /* Fixup IO space offset */ io_offset = (unsigned long)hose->io_base_virt - isa_io_base; res->start = (res->start + io_offset) & 0xffffffffu; res->end = (res->end + io_offset) & 0xffffffffu; - - /* Wire up PHB bus resources */ - pcibios_setup_phb_resources(hose); - - /* Scan children */ - hose->last_busno = bus->subordinate = pci_scan_child_bus(bus); } static int __init pcibios_init(void) @@ -401,7 +380,7 @@ static int __init pcibios_init(void) if (pci_assign_all_buses) hose->first_busno = next_busno; hose->last_busno = 0xff; - pcibios_scan_phb(hose); + pcibios_scan_phb(hose, hose); pci_bus_add_devices(hose->bus); if (pci_assign_all_buses || next_busno <= hose->last_busno) next_busno = hose->last_busno + pcibios_assign_bus_offset; diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 4d5b4ced7e45..ba949a2c93ac 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -43,45 +43,6 @@ unsigned long pci_probe_only = 1; unsigned long pci_io_base = ISA_IO_BASE; EXPORT_SYMBOL(pci_io_base); -void __devinit scan_phb(struct pci_controller *hose) -{ - struct pci_bus *bus; - struct device_node *node = hose->dn; - int mode; - - pr_debug("PCI: Scanning PHB %s\n", - node ? node->full_name : ""); - - /* Create an empty bus for the toplevel */ - bus = pci_create_bus(hose->parent, hose->first_busno, hose->ops, node); - if (bus == NULL) { - printk(KERN_ERR "Failed to create bus for PCI domain %04x\n", - hose->global_number); - return; - } - bus->secondary = hose->first_busno; - hose->bus = bus; - - /* Get some IO space for the new PHB */ - pcibios_map_io_space(bus); - - /* Wire up PHB bus resources */ - pcibios_setup_phb_resources(hose); - - /* Get probe mode and perform scan */ - mode = PCI_PROBE_NORMAL; - if (node && ppc_md.pci_probe_mode) - mode = ppc_md.pci_probe_mode(bus); - pr_debug(" probe mode: %d\n", mode); - if (mode == PCI_PROBE_DEVTREE) { - bus->subordinate = hose->last_busno; - of_scan_bus(node, bus); - } - - if (mode == PCI_PROBE_NORMAL) - hose->last_busno = bus->subordinate = pci_scan_child_bus(bus); -} - static int __init pcibios_init(void) { struct pci_controller *hose, *tmp; @@ -103,7 +64,7 @@ static int __init pcibios_init(void) /* Scan all of the recorded PCI controllers. */ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { - scan_phb(hose); + pcibios_scan_phb(hose, hose->dn); pci_bus_add_devices(hose->bus); } @@ -237,6 +198,11 @@ int __devinit pcibios_map_io_space(struct pci_bus *bus) } EXPORT_SYMBOL_GPL(pcibios_map_io_space); +void __devinit pcibios_setup_phb_io_space(struct pci_controller *hose) +{ + pcibios_map_io_space(hose->bus); +} + #define IOBASE_BRIDGE_NUMBER 0 #define IOBASE_MEMORY 1 #define IOBASE_IO 2 -- cgit From 46db2f86a3b2a94e0b33e0b4548fb7b7b6bdff66 Mon Sep 17 00:00:00 2001 From: Brian King Date: Fri, 28 Aug 2009 12:06:29 +0000 Subject: powerpc/pseries: Fix to handle slb resize across migration The SLB can change sizes across a live migration, which was not being handled, resulting in possible machine crashes during migration if migrating to a machine which has a smaller max SLB size than the source machine. Fix this by first reducing the SLB size to the minimum possible value, which is 32, prior to migration. Then during the device tree update which occurs after migration, we make the call to ensure the SLB gets updated. Also add the slb_size to the lparcfg output so that the migration tools can check to make sure the kernel has this capability before allowing migration in scenarios where the SLB size will change. BenH: Fixed #include -> to avoid breaking ppc32 build Signed-off-by: Brian King Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/lparcfg.c | 3 +++ arch/powerpc/kernel/rtas.c | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 2419cc706ff1..ed0ac4e4b8d8 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -35,6 +35,7 @@ #include #include #include +#include #define MODULE_VERS "1.8" #define MODULE_NAME "lparcfg" @@ -537,6 +538,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) seq_printf(m, "shared_processor_mode=%d\n", lppaca[0].shared_proc); + seq_printf(m, "slb_size=%d\n", mmu_slb_size); + return 0; } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index c434823b8c83..bf90361bb70f 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -39,6 +39,7 @@ #include #include #include +#include struct rtas_t rtas = { .lock = __RAW_SPIN_LOCK_UNLOCKED @@ -713,6 +714,7 @@ static void rtas_percpu_suspend_me(void *info) { long rc = H_SUCCESS; unsigned long msr_save; + u16 slb_size = mmu_slb_size; int cpu; struct rtas_suspend_me_data *data = (struct rtas_suspend_me_data *)info; @@ -735,13 +737,16 @@ static void rtas_percpu_suspend_me(void *info) /* All other cpus are in H_JOIN, this cpu does * the suspend. */ + slb_set_size(SLB_MIN_SIZE); printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", smp_processor_id()); data->error = rtas_call(data->token, 0, 1, NULL); - if (data->error) + if (data->error) { printk(KERN_DEBUG "ibm,suspend-me returned %d\n", data->error); + slb_set_size(slb_size); + } } else { printk(KERN_ERR "H_JOIN on cpu %i failed with rc = %ld\n", smp_processor_id(), rc); -- cgit From 76acc2c1a7a9a8c2cae7e9cf8d0a8b374a48aa94 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Tue, 1 Sep 2009 15:48:42 +0000 Subject: powerpc/fsl-booke: Use HW PTE format if CONFIG_PTE_64BIT Switch to using the Power ISA defined PTE format when we have a 64-bit PTE. This makes the code handling between fsl-booke and book3e-64 similiar for TLB faults. Additionally this lets use take advantage of the page size encodings and full permissions that the HW PTE defines. Also defined _PMD_PRESENT, _PMD_PRESENT_MASK, and _PMD_BAD since the 32-bit ppc arch code expects them. Signed-off-by: Kumar Gala Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_fsl_booke.S | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 2c5af5256479..975788ca05d2 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -575,7 +575,12 @@ interrupt_base: * place or can we save a couple of instructions here ? */ mfspr r12,SPRN_ESR +#ifdef CONFIG_PTE_64BIT + li r13,_PAGE_PRESENT + oris r13,r13,_PAGE_ACCESSED@h +#else li r13,_PAGE_PRESENT|_PAGE_ACCESSED +#endif rlwimi r13,r12,11,29,29 FIND_PTE @@ -643,7 +648,12 @@ interrupt_base: 4: /* Make up the required permissions */ +#ifdef CONFIG_PTE_64BIT + li r13,_PAGE_PRESENT | _PAGE_EXEC + oris r13,r13,_PAGE_ACCESSED@h +#else li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC +#endif FIND_PTE andc. r13,r13,r11 /* Check permission */ @@ -733,7 +743,7 @@ finish_tlb_load: mfspr r12, SPRN_MAS2 #ifdef CONFIG_PTE_64BIT - rlwimi r12, r11, 26, 24, 31 /* extract ...WIMGE from pte */ + rlwimi r12, r11, 32-19, 27, 31 /* extract WIMGE from pte */ #else rlwimi r12, r11, 26, 27, 31 /* extract WIMGE from pte */ #endif @@ -742,6 +752,20 @@ finish_tlb_load: #endif mtspr SPRN_MAS2, r12 +#ifdef CONFIG_PTE_64BIT + rlwinm r12, r11, 32-2, 26, 31 /* Move in perm bits */ + andi. r10, r11, _PAGE_DIRTY + bne 1f + li r10, MAS3_SW | MAS3_UW + andc r12, r12, r10 +1: rlwimi r12, r13, 20, 0, 11 /* grab RPN[32:43] */ + rlwimi r12, r11, 20, 12, 19 /* grab RPN[44:51] */ + mtspr SPRN_MAS3, r12 +BEGIN_MMU_FTR_SECTION + srwi r10, r13, 12 /* grab RPN[12:31] */ + mtspr SPRN_MAS7, r10 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) +#else li r10, (_PAGE_EXEC | _PAGE_PRESENT) rlwimi r10, r11, 31, 29, 29 /* extract _PAGE_DIRTY into SW */ and r12, r11, r10 @@ -749,16 +773,6 @@ finish_tlb_load: slwi r10, r12, 1 or r10, r10, r12 iseleq r12, r12, r10 - -#ifdef CONFIG_PTE_64BIT - rlwimi r12, r13, 24, 0, 7 /* grab RPN[32:39] */ - rlwimi r12, r11, 24, 8, 19 /* grab RPN[40:51] */ - mtspr SPRN_MAS3, r12 -BEGIN_MMU_FTR_SECTION - srwi r10, r13, 8 /* grab RPN[8:31] */ - mtspr SPRN_MAS7, r10 -END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) -#else rlwimi r11, r12, 0, 20, 31 /* Extract RPN from PTE and merge with perms */ mtspr SPRN_MAS3, r11 #endif -- cgit From 6e19314cc98ab9ccc22c30d1c414984ac6de5ce2 Mon Sep 17 00:00:00 2001 From: Mike Mason Date: Thu, 30 Jul 2009 15:42:39 -0700 Subject: PCI/powerpc: support PCIe fundamental reset By default, the EEH framework on powerpc does what's known as a "hot reset" during recovery of a PCI Express device. We've found a case where the device needs a "fundamental reset" to recover properly. The current PCI error recovery and EEH frameworks do not support this distinction. The attached patch makes changes to EEH to utilize the new bit field. Signed-off-by: Mike Mason Signed-off-by: Richard Lary Signed-off-by: Jesse Barnes --- arch/powerpc/kernel/pci_64.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 9e8902fa14c7..b6e9ea45a719 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -143,6 +143,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, dev->dev.bus = &pci_bus_type; dev->devfn = devfn; dev->multifunction = 0; /* maybe a lie? */ + dev->needs_freset = 0; /* pcie fundamental reset required */ dev->vendor = get_int_prop(node, "vendor-id", 0xffff); dev->device = get_int_prop(node, "device-id", 0xffff); -- cgit From 757cbd46d11cfa7506b7dd5dd6657ae645bf6a17 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Tue, 8 Sep 2009 17:38:52 +0000 Subject: powerpc/85xx: Fix SMP compile error and allow NULL for smp_ops The following commit introduced a compile error since it removed the implementation of smp_85xx_basic_setup: commit 77c0a700c1c292edafa11c1e52821ce4636f81b0 Author: Benjamin Herrenschmidt Date: Fri Aug 28 14:25:04 2009 +1000 powerpc: Properly start decrementer on BookE secondary CPUs Make it so that smp_ops probe() and setup_cpu() can be set to NULL. Signed-off-by: Kumar Gala Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 96f107cc0160..d387b3937ccc 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -269,7 +269,10 @@ void __init smp_prepare_cpus(unsigned int max_cpus) cpu_callin_map[boot_cpuid] = 1; if (smp_ops) - max_cpus = smp_ops->probe(); + if (smp_ops->probe) + max_cpus = smp_ops->probe(); + else + max_cpus = NR_CPUS; else max_cpus = 1; @@ -493,7 +496,8 @@ int __devinit start_secondary(void *unused) preempt_disable(); cpu_callin_map[cpu] = 1; - smp_ops->setup_cpu(cpu); + if (smp_ops->setup_cpu) + smp_ops->setup_cpu(cpu); if (smp_ops->take_timebase) smp_ops->take_timebase(); @@ -556,7 +560,7 @@ void __init smp_cpus_done(unsigned int max_cpus) old_mask = current->cpus_allowed; set_cpus_allowed(current, cpumask_of_cpu(boot_cpuid)); - if (smp_ops) + if (smp_ops && smp_ops->setup_cpu) smp_ops->setup_cpu(boot_cpuid); set_cpus_allowed(current, old_mask); -- cgit From a6dbf93a2ad853585409e715eb96dca9177e3c39 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 9 Sep 2009 01:26:03 +0000 Subject: powerpc: Fix bug where perf_counters breaks oprofile Currently there is a bug where if you use oprofile on a pSeries machine, then use perf_counters, then use oprofile again, oprofile will not work correctly; it will lose the PMU configuration the next time the hypervisor does a partition context switch, and thereafter won't count anything. Maynard Johnson identified the sequence causing the problem: - oprofile setup calls ppc_enable_pmcs(), which calls pseries_lpar_enable_pmcs, which tells the hypervisor that we want to use the PMU, and sets the "PMU in use" flag in the lppaca. This flag tells the hypervisor whether it needs to save and restore the PMU config. - The perf_counter code sets and clears the "PMU in use" flag directly as it context-switches the PMU between tasks, and leaves it clear when it finishes. - oprofile setup, called for a new oprofile run, calls ppc_enable_pmcs, which does nothing because it has already been called. In particular it doesn't set the "PMU in use" flag. This fixes the problem by arranging for ppc_enable_pmcs to always set the "PMU in use" flag. It makes the perf_counter code call ppc_enable_pmcs also rather than calling the lower-level function directly, and removes the setting of the "PMU in use" flag from pseries_lpar_enable_pmcs, since that is now done in its caller. This also removes the declaration of pasemi_enable_pmcs because it isn't defined anywhere. Reported-by: Maynard Johnson Signed-off-by: Paul Mackerras Cc: --- arch/powerpc/kernel/perf_counter.c | 13 +++---------- arch/powerpc/kernel/sysfs.c | 3 +++ 2 files changed, 6 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 70e1f57f7dd8..ccd6b2135642 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -62,7 +62,6 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs) { return 0; } -static inline void perf_set_pmu_inuse(int inuse) { } static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { } static inline u32 perf_get_misc_flags(struct pt_regs *regs) { @@ -93,11 +92,6 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs) return 0; } -static inline void perf_set_pmu_inuse(int inuse) -{ - get_lppaca()->pmcregs_in_use = inuse; -} - /* * The user wants a data address recorded. * If we're not doing instruction sampling, give them the SDAR @@ -531,8 +525,7 @@ void hw_perf_disable(void) * Check if we ever enabled the PMU on this cpu. */ if (!cpuhw->pmcs_enabled) { - if (ppc_md.enable_pmcs) - ppc_md.enable_pmcs(); + ppc_enable_pmcs(); cpuhw->pmcs_enabled = 1; } @@ -594,7 +587,7 @@ void hw_perf_enable(void) mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); if (cpuhw->n_counters == 0) - perf_set_pmu_inuse(0); + ppc_set_pmu_inuse(0); goto out_enable; } @@ -627,7 +620,7 @@ void hw_perf_enable(void) * bit set and set the hardware counters to their initial values. * Then unfreeze the counters. */ - perf_set_pmu_inuse(1); + ppc_set_pmu_inuse(1); mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index f41aec85aa49..956ab33fd73f 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "cacheinfo.h" @@ -123,6 +124,8 @@ static DEFINE_PER_CPU(char, pmcs_enabled); void ppc_enable_pmcs(void) { + ppc_set_pmu_inuse(1); + /* Only need to enable them once */ if (__get_cpu_var(pmcs_enabled)) return; -- cgit From e51ee31e8af22948dcc3b115978469b09c96c3fd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 9 Sep 2009 20:28:49 +0000 Subject: powerpc/perf_counters: Reduce stack usage of power_check_constraints Michael Ellerman reported stack-frame size warnings being produced for power_check_constraints(), which uses an 8*8 array of u64 and two 8*8 arrays of unsigned long, which are currently allocated on the stack, along with some other smaller variables. These arrays come to 1.5kB on 64-bit or 1kB on 32-bit, which is a bit too much for the stack. This fixes the problem by putting these arrays in the existing per-cpu cpu_hw_counters struct. This is OK because two of the call sites have interrupts disabled already; for the third call site we use get_cpu_var, which disables preemption, so we know we won't get a context switch while we're in power_check_constraints(). Note that power_check_constraints() can be called during context switch but is not called from interrupts. Reported-by: Michael Ellerman Signed-off-by: Paul Mackerras Cc: --- arch/powerpc/kernel/perf_counter.c | 55 ++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 23 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index ccd6b2135642..7ceefaf3a7f5 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -32,6 +32,9 @@ struct cpu_hw_counters { unsigned long mmcr[3]; struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS]; u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; + u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; + unsigned long amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; + unsigned long avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; }; DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); @@ -239,13 +242,11 @@ static void write_pmc(int idx, unsigned long val) * and see if any combination of alternative codes is feasible. * The feasible set is returned in event[]. */ -static int power_check_constraints(u64 event[], unsigned int cflags[], +static int power_check_constraints(struct cpu_hw_counters *cpuhw, + u64 event[], unsigned int cflags[], int n_ev) { unsigned long mask, value, nv; - u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; - unsigned long amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; - unsigned long avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; unsigned long smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS]; int i, j; @@ -260,21 +261,23 @@ static int power_check_constraints(u64 event[], unsigned int cflags[], if ((cflags[i] & PPMU_LIMITED_PMC_REQD) && !ppmu->limited_pmc_event(event[i])) { ppmu->get_alternatives(event[i], cflags[i], - alternatives[i]); - event[i] = alternatives[i][0]; + cpuhw->alternatives[i]); + event[i] = cpuhw->alternatives[i][0]; } - if (ppmu->get_constraint(event[i], &amasks[i][0], - &avalues[i][0])) + if (ppmu->get_constraint(event[i], &cpuhw->amasks[i][0], + &cpuhw->avalues[i][0])) return -1; } value = mask = 0; for (i = 0; i < n_ev; ++i) { - nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf); + nv = (value | cpuhw->avalues[i][0]) + + (value & cpuhw->avalues[i][0] & addf); if ((((nv + tadd) ^ value) & mask) != 0 || - (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0) + (((nv + tadd) ^ cpuhw->avalues[i][0]) & + cpuhw->amasks[i][0]) != 0) break; value = nv; - mask |= amasks[i][0]; + mask |= cpuhw->amasks[i][0]; } if (i == n_ev) return 0; /* all OK */ @@ -285,10 +288,11 @@ static int power_check_constraints(u64 event[], unsigned int cflags[], for (i = 0; i < n_ev; ++i) { choice[i] = 0; n_alt[i] = ppmu->get_alternatives(event[i], cflags[i], - alternatives[i]); + cpuhw->alternatives[i]); for (j = 1; j < n_alt[i]; ++j) - ppmu->get_constraint(alternatives[i][j], - &amasks[i][j], &avalues[i][j]); + ppmu->get_constraint(cpuhw->alternatives[i][j], + &cpuhw->amasks[i][j], + &cpuhw->avalues[i][j]); } /* enumerate all possibilities and see if any will work */ @@ -307,11 +311,11 @@ static int power_check_constraints(u64 event[], unsigned int cflags[], * where k > j, will satisfy the constraints. */ while (++j < n_alt[i]) { - nv = (value | avalues[i][j]) + - (value & avalues[i][j] & addf); + nv = (value | cpuhw->avalues[i][j]) + + (value & cpuhw->avalues[i][j] & addf); if ((((nv + tadd) ^ value) & mask) == 0 && - (((nv + tadd) ^ avalues[i][j]) - & amasks[i][j]) == 0) + (((nv + tadd) ^ cpuhw->avalues[i][j]) + & cpuhw->amasks[i][j]) == 0) break; } if (j >= n_alt[i]) { @@ -333,7 +337,7 @@ static int power_check_constraints(u64 event[], unsigned int cflags[], svalues[i] = value; smasks[i] = mask; value = nv; - mask |= amasks[i][j]; + mask |= cpuhw->amasks[i][j]; ++i; j = -1; } @@ -341,7 +345,7 @@ static int power_check_constraints(u64 event[], unsigned int cflags[], /* OK, we have a feasible combination, tell the caller the solution */ for (i = 0; i < n_ev; ++i) - event[i] = alternatives[i][choice[i]]; + event[i] = cpuhw->alternatives[i][choice[i]]; return 0; } @@ -745,7 +749,7 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, return -EAGAIN; if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n)) return -EAGAIN; - i = power_check_constraints(cpuhw->events, cpuhw->flags, n + n0); + i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0); if (i < 0) return -EAGAIN; cpuhw->n_counters = n0 + n; @@ -800,7 +804,7 @@ static int power_pmu_enable(struct perf_counter *counter) cpuhw->flags[n0] = counter->hw.counter_base; if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1)) goto out; - if (power_check_constraints(cpuhw->events, cpuhw->flags, n0 + 1)) + if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) goto out; counter->hw.config = cpuhw->events[n0]; @@ -1005,6 +1009,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) unsigned int cflags[MAX_HWCOUNTERS]; int n; int err; + struct cpu_hw_counters *cpuhw; if (!ppmu) return ERR_PTR(-ENXIO); @@ -1083,7 +1088,11 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) cflags[n] = flags; if (check_excludes(ctrs, cflags, n, 1)) return ERR_PTR(-EINVAL); - if (power_check_constraints(events, cflags, n + 1)) + + cpuhw = &get_cpu_var(cpu_hw_counters); + err = power_check_constraints(cpuhw, events, cflags, n + 1); + put_cpu_var(cpu_hw_counters); + if (err) return ERR_PTR(-EINVAL); counter->hw.config = events[n]; -- cgit From c6c9eacef09a94b5866b83556196440aca876702 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 8 Sep 2009 14:16:58 +0000 Subject: powerpc/booke: Don't set DABR on 64-bit BookE, use DAC1 instead Also remove a duplicate setting of it in the context switch path on BookE. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/process.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 678ff132e8b0..0a3216433051 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -284,14 +284,13 @@ int set_dabr(unsigned long dabr) return ppc_md.set_dabr(dabr); /* XXX should we have a CPU_FTR_HAS_DABR ? */ -#if defined(CONFIG_PPC64) || defined(CONFIG_6xx) - mtspr(SPRN_DABR, dabr); -#endif - #if defined(CONFIG_BOOKE) mtspr(SPRN_DAC1, dabr); +#elif defined(CONFIG_PPC_BOOK3S) + mtspr(SPRN_DABR, dabr); #endif + return 0; } @@ -372,15 +371,16 @@ struct task_struct *__switch_to(struct task_struct *prev, #endif /* CONFIG_SMP */ - if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) - set_dabr(new->thread.dabr); - #if defined(CONFIG_BOOKE) /* If new thread DAC (HW breakpoint) is the same then leave it */ if (new->thread.dabr) set_dabr(new->thread.dabr); +#else + if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) + set_dabr(new->thread.dabr); #endif + new_thread = &new->thread; old_thread = ¤t->thread; -- cgit From d331d8305cba713605854aab63a000fb892353a7 Mon Sep 17 00:00:00 2001 From: Martyn Welch Date: Thu, 13 Aug 2009 09:03:02 +0100 Subject: powerpc/nvram: Enable use Generic NVRAM driver for different size chips Remove the reliance on a staticly defined NVRAM size, allowing platforms to support NVRAMs with sizes differing from the standard. A fall back value is provided for platforms not supporting this extension. Signed-off-by: Martyn Welch Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup_32.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index e1e3059cf34b..53bcf3d792db 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -210,6 +210,14 @@ void nvram_write_byte(unsigned char val, int addr) } EXPORT_SYMBOL(nvram_write_byte); +ssize_t nvram_get_size(void) +{ + if (ppc_md.nvram_size) + return ppc_md.nvram_size(); + return -1; +} +EXPORT_SYMBOL(nvram_get_size); + void nvram_sync(void) { if (ppc_md.nvram_sync) -- cgit From f86fd306605287d7c7f4f0f8e8e2a9d49d28b396 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 19 Sep 2009 10:14:33 +0200 Subject: kbuild: rename ld-option to cc-ldoption ld-option is misnamed as it test options to gcc, not to ld. Renamed it to reflect this. Cc: Andi Kleen Cc: Roland McGrath Signed-off-by: Sam Ravnborg --- arch/powerpc/kernel/vdso32/Makefile | 2 +- arch/powerpc/kernel/vdso64/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index b54b81688132..51ead52141bd 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -16,7 +16,7 @@ GCOV_PROFILE := n EXTRA_CFLAGS := -shared -fno-common -fno-builtin EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ - $(call ld-option, -Wl$(comma)--hash-style=sysv) + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) EXTRA_AFLAGS := -D__VDSO32__ -s obj-y += vdso32_wrapper.o diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index dd0c8e936775..79da65d44a2a 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -11,7 +11,7 @@ GCOV_PROFILE := n EXTRA_CFLAGS := -shared -fno-common -fno-builtin EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ - $(call ld-option, -Wl$(comma)--hash-style=sysv) + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) EXTRA_AFLAGS := -D__VDSO64__ -s obj-y += vdso64_wrapper.o -- cgit From d200c922bc2b1ac88b8d33b6cfff2ed837af186a Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 20 Sep 2009 18:14:13 -0400 Subject: Use new __init_task_data macro in arch init_task.c files. Signed-off-by: Joe Perches Signed-off-by: Tim Abbott Acked-by: Paul Mundt Signed-off-by: Sam Ravnborg --- arch/powerpc/kernel/init_task.c | 5 ++--- arch/powerpc/kernel/machine_kexec_64.c | 5 +++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c index ffc4253fef55..2375b7eb1c76 100644 --- a/arch/powerpc/kernel/init_task.c +++ b/arch/powerpc/kernel/init_task.c @@ -16,9 +16,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 49e705fcee6d..040bd1de8d99 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -249,8 +250,8 @@ static void kexec_prepare_cpus(void) * We could use a smaller stack if we don't care about anything using * current, but that audit has not been performed. */ -static union thread_union kexec_stack - __attribute__((__section__(".data.init_task"))) = { }; +static union thread_union kexec_stack __init_task_data = + { }; /* Our assembly helper, in kexec_stub.S */ extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start, -- cgit From abe1ee3a221d53778c3e58747bbec6e518e5471b Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Sun, 20 Sep 2009 18:14:15 -0400 Subject: Use macros for .data.page_aligned section. This patch changes the remaining direct references to .data.page_aligned in C and assembly code to use the macros in include/linux/linkage.h. Signed-off-by: Tim Abbott Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Haavard Skinnemoen Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Signed-off-by: Sam Ravnborg --- arch/powerpc/kernel/vdso.c | 3 ++- arch/powerpc/kernel/vdso32/vdso32_wrapper.S | 3 ++- arch/powerpc/kernel/vdso64/vdso64_wrapper.S | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index a0abce251d0a..3faaf29bdb29 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -1,3 +1,4 @@ + /* * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp. * @@ -74,7 +75,7 @@ static int vdso_ready; static union { struct vdso_data data; u8 page[PAGE_SIZE]; -} vdso_data_store __attribute__((__section__(".data.page_aligned"))); +} vdso_data_store __page_aligned_data; struct vdso_data *vdso_data = &vdso_data_store.data; /* Format of the patch table */ diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S index 556f0caa5d84..6e8f507ed32b 100644 --- a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S +++ b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S @@ -1,7 +1,8 @@ #include +#include #include - .section ".data.page_aligned" + __PAGE_ALIGNED_DATA .globl vdso32_start, vdso32_end .balign PAGE_SIZE diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S index 0529cb9e3b97..b8553d62b792 100644 --- a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S +++ b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S @@ -1,7 +1,8 @@ #include +#include #include - .section ".data.page_aligned" + __PAGE_ALIGNED_DATA .globl vdso64_start, vdso64_end .balign PAGE_SIZE -- cgit From cd74c86bdf705f824d494a2bbda393d1d562b40a Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 21 Sep 2009 16:44:32 +1000 Subject: perf_counter, powerpc, sparc: Fix compilation after perf_counter_overflow() change Commit 5622f295 ("x86, perf_counter, bts: Optimize BTS overflow handling") removed the regs field from struct perf_sample_data and added a regs parameter to perf_counter_overflow(). This breaks the build on powerpc (and Sparc) as reported by Sachin Sant: arch/powerpc/kernel/perf_counter.c: In function 'record_and_restart': arch/powerpc/kernel/perf_counter.c:1165: error: unknown field 'regs' specified in initializer This adjusts arch/powerpc/kernel/perf_counter.c to correspond with the new struct perf_sample_data and perf_counter_overflow(). [ v2: also fix Sparc, Markus Metzger ] Reported-by: Sachin Sant Signed-off-by: Paul Mackerras Cc: Markus Metzger Cc: David S. Miller Cc: benh@kernel.crashing.org Cc: linuxppc-dev@ozlabs.org Cc: Peter Zijlstra LKML-Reference: <19127.8400.376239.586120@drongo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 7ceefaf3a7f5..5ccf9bca96c0 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -1162,7 +1162,6 @@ static void record_and_restart(struct perf_counter *counter, unsigned long val, */ if (record) { struct perf_sample_data data = { - .regs = regs, .addr = 0, .period = counter->hw.last_period, }; @@ -1170,7 +1169,7 @@ static void record_and_restart(struct perf_counter *counter, unsigned long val, if (counter->attr.sample_type & PERF_SAMPLE_ADDR) perf_get_data_addr(regs, &data.addr); - if (perf_counter_overflow(counter, nmi, &data)) { + if (perf_counter_overflow(counter, nmi, &data, regs)) { /* * Interrupts are coming too fast - throttle them * by setting the counter to 0, so it will be -- cgit From cdd6c482c9ff9c55475ee7392ec8f672eddb7be6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 21 Sep 2009 12:02:48 +0200 Subject: perf: Do the big rename: Performance Counters -> Performance Events Bye-bye Performance Counters, welcome Performance Events! In the past few months the perfcounters subsystem has grown out its initial role of counting hardware events, and has become (and is becoming) a much broader generic event enumeration, reporting, logging, monitoring, analysis facility. Naming its core object 'perf_counter' and naming the subsystem 'perfcounters' has become more and more of a misnomer. With pending code like hw-breakpoints support the 'counter' name is less and less appropriate. All in one, we've decided to rename the subsystem to 'performance events' and to propagate this rename through all fields, variables and API names. (in an ABI compatible fashion) The word 'event' is also a bit shorter than 'counter' - which makes it slightly more convenient to write/handle as well. Thanks goes to Stephane Eranian who first observed this misnomer and suggested a rename. User-space tooling and ABI compatibility is not affected - this patch should be function-invariant. (Also, defconfigs were not touched to keep the size down.) This patch has been generated via the following script: FILES=$(find * -type f | grep -vE 'oprofile|[^K]config') sed -i \ -e 's/PERF_EVENT_/PERF_RECORD_/g' \ -e 's/PERF_COUNTER/PERF_EVENT/g' \ -e 's/perf_counter/perf_event/g' \ -e 's/nb_counters/nb_events/g' \ -e 's/swcounter/swevent/g' \ -e 's/tpcounter_event/tp_event/g' \ $FILES for N in $(find . -name perf_counter.[ch]); do M=$(echo $N | sed 's/perf_counter/perf_event/g') mv $N $M done FILES=$(find . -name perf_event.*) sed -i \ -e 's/COUNTER_MASK/REG_MASK/g' \ -e 's/COUNTER/EVENT/g' \ -e 's/\/event_id/g' \ -e 's/counter/event/g' \ -e 's/Counter/Event/g' \ $FILES ... to keep it as correct as possible. This script can also be used by anyone who has pending perfcounters patches - it converts a Linux kernel tree over to the new naming. We tried to time this change to the point in time where the amount of pending patches is the smallest: the end of the merge window. Namespace clashes were fixed up in a preparatory patch - and some stylistic fallout will be fixed up in a subsequent patch. ( NOTE: 'counters' are still the proper terminology when we deal with hardware registers - and these sed scripts are a bit over-eager in renaming them. I've undone some of that, but in case there's something left where 'counter' would be better than 'event' we can undo that on an individual basis instead of touching an otherwise nicely automated patch. ) Suggested-by: Stephane Eranian Acked-by: Peter Zijlstra Acked-by: Paul Mackerras Reviewed-by: Arjan van de Ven Cc: Mike Galbraith Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Benjamin Herrenschmidt Cc: David Howells Cc: Kyle McMartin Cc: Martin Schwidefsky Cc: "David S. Miller" Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: LKML-Reference: Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kernel/entry_64.S | 8 +- arch/powerpc/kernel/irq.c | 8 +- arch/powerpc/kernel/mpc7450-pmu.c | 2 +- arch/powerpc/kernel/perf_callchain.c | 2 +- arch/powerpc/kernel/perf_counter.c | 1315 ---------------------------------- arch/powerpc/kernel/perf_event.c | 1315 ++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/power4-pmu.c | 2 +- arch/powerpc/kernel/power5+-pmu.c | 2 +- arch/powerpc/kernel/power5-pmu.c | 2 +- arch/powerpc/kernel/power6-pmu.c | 2 +- arch/powerpc/kernel/power7-pmu.c | 2 +- arch/powerpc/kernel/ppc970-pmu.c | 2 +- arch/powerpc/kernel/time.c | 30 +- 15 files changed, 1348 insertions(+), 1348 deletions(-) delete mode 100644 arch/powerpc/kernel/perf_counter.c create mode 100644 arch/powerpc/kernel/perf_event.c (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 569f79ccd310..b23664a0b86c 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -97,7 +97,7 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_PPC_PERF_CTRS) += perf_counter.o perf_callchain.o +obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o perf_callchain.o obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index f0df285f0f87..0812b0f414bb 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -133,7 +133,7 @@ int main(void) DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); - DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending)); + DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); #ifdef CONFIG_PPC_MM_SLICES DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 66bcda34a6bb..900e0eea0099 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -556,14 +556,14 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) 2: TRACE_AND_RESTORE_IRQ(r5); -#ifdef CONFIG_PERF_COUNTERS - /* check paca->perf_counter_pending if we're enabling ints */ +#ifdef CONFIG_PERF_EVENTS + /* check paca->perf_event_pending if we're enabling ints */ lbz r3,PACAPERFPEND(r13) and. r3,r3,r5 beq 27f - bl .perf_counter_do_pending + bl .perf_event_do_pending 27: -#endif /* CONFIG_PERF_COUNTERS */ +#endif /* CONFIG_PERF_EVENTS */ /* extract EE bit and use it to restore paca->hard_enabled */ ld r3,_MSR(r1) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index f7f376ea7b17..e5d121177984 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -53,7 +53,7 @@ #include #include #include -#include +#include #include #include @@ -138,9 +138,9 @@ notrace void raw_local_irq_restore(unsigned long en) } #endif /* CONFIG_PPC_STD_MMU_64 */ - if (test_perf_counter_pending()) { - clear_perf_counter_pending(); - perf_counter_do_pending(); + if (test_perf_event_pending()) { + clear_perf_event_pending(); + perf_event_do_pending(); } /* diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c index cc466d039af6..09d72028f317 100644 --- a/arch/powerpc/kernel/mpc7450-pmu.c +++ b/arch/powerpc/kernel/mpc7450-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index f74b62c67511..0a03cf70d247 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c @@ -10,7 +10,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c deleted file mode 100644 index 5ccf9bca96c0..000000000000 --- a/arch/powerpc/kernel/perf_counter.c +++ /dev/null @@ -1,1315 +0,0 @@ -/* - * Performance counter support - powerpc architecture code - * - * Copyright 2008-2009 Paul Mackerras, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct cpu_hw_counters { - int n_counters; - int n_percpu; - int disabled; - int n_added; - int n_limited; - u8 pmcs_enabled; - struct perf_counter *counter[MAX_HWCOUNTERS]; - u64 events[MAX_HWCOUNTERS]; - unsigned int flags[MAX_HWCOUNTERS]; - unsigned long mmcr[3]; - struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS]; - u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; - u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; - unsigned long amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; - unsigned long avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; -}; -DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); - -struct power_pmu *ppmu; - -/* - * Normally, to ignore kernel events we set the FCS (freeze counters - * in supervisor mode) bit in MMCR0, but if the kernel runs with the - * hypervisor bit set in the MSR, or if we are running on a processor - * where the hypervisor bit is forced to 1 (as on Apple G5 processors), - * then we need to use the FCHV bit to ignore kernel events. - */ -static unsigned int freeze_counters_kernel = MMCR0_FCS; - -/* - * 32-bit doesn't have MMCRA but does have an MMCR2, - * and a few other names are different. - */ -#ifdef CONFIG_PPC32 - -#define MMCR0_FCHV 0 -#define MMCR0_PMCjCE MMCR0_PMCnCE - -#define SPRN_MMCRA SPRN_MMCR2 -#define MMCRA_SAMPLE_ENABLE 0 - -static inline unsigned long perf_ip_adjust(struct pt_regs *regs) -{ - return 0; -} -static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { } -static inline u32 perf_get_misc_flags(struct pt_regs *regs) -{ - return 0; -} -static inline void perf_read_regs(struct pt_regs *regs) { } -static inline int perf_intr_is_nmi(struct pt_regs *regs) -{ - return 0; -} - -#endif /* CONFIG_PPC32 */ - -/* - * Things that are specific to 64-bit implementations. - */ -#ifdef CONFIG_PPC64 - -static inline unsigned long perf_ip_adjust(struct pt_regs *regs) -{ - unsigned long mmcra = regs->dsisr; - - if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { - unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; - if (slot > 1) - return 4 * (slot - 1); - } - return 0; -} - -/* - * The user wants a data address recorded. - * If we're not doing instruction sampling, give them the SDAR - * (sampled data address). If we are doing instruction sampling, then - * only give them the SDAR if it corresponds to the instruction - * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC - * bit in MMCRA. - */ -static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) -{ - unsigned long mmcra = regs->dsisr; - unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? - POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; - - if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) - *addrp = mfspr(SPRN_SDAR); -} - -static inline u32 perf_get_misc_flags(struct pt_regs *regs) -{ - unsigned long mmcra = regs->dsisr; - - if (TRAP(regs) != 0xf00) - return 0; /* not a PMU interrupt */ - - if (ppmu->flags & PPMU_ALT_SIPR) { - if (mmcra & POWER6_MMCRA_SIHV) - return PERF_EVENT_MISC_HYPERVISOR; - return (mmcra & POWER6_MMCRA_SIPR) ? - PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL; - } - if (mmcra & MMCRA_SIHV) - return PERF_EVENT_MISC_HYPERVISOR; - return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER : - PERF_EVENT_MISC_KERNEL; -} - -/* - * Overload regs->dsisr to store MMCRA so we only need to read it once - * on each interrupt. - */ -static inline void perf_read_regs(struct pt_regs *regs) -{ - regs->dsisr = mfspr(SPRN_MMCRA); -} - -/* - * If interrupts were soft-disabled when a PMU interrupt occurs, treat - * it as an NMI. - */ -static inline int perf_intr_is_nmi(struct pt_regs *regs) -{ - return !regs->softe; -} - -#endif /* CONFIG_PPC64 */ - -static void perf_counter_interrupt(struct pt_regs *regs); - -void perf_counter_print_debug(void) -{ -} - -/* - * Read one performance monitor counter (PMC). - */ -static unsigned long read_pmc(int idx) -{ - unsigned long val; - - switch (idx) { - case 1: - val = mfspr(SPRN_PMC1); - break; - case 2: - val = mfspr(SPRN_PMC2); - break; - case 3: - val = mfspr(SPRN_PMC3); - break; - case 4: - val = mfspr(SPRN_PMC4); - break; - case 5: - val = mfspr(SPRN_PMC5); - break; - case 6: - val = mfspr(SPRN_PMC6); - break; -#ifdef CONFIG_PPC64 - case 7: - val = mfspr(SPRN_PMC7); - break; - case 8: - val = mfspr(SPRN_PMC8); - break; -#endif /* CONFIG_PPC64 */ - default: - printk(KERN_ERR "oops trying to read PMC%d\n", idx); - val = 0; - } - return val; -} - -/* - * Write one PMC. - */ -static void write_pmc(int idx, unsigned long val) -{ - switch (idx) { - case 1: - mtspr(SPRN_PMC1, val); - break; - case 2: - mtspr(SPRN_PMC2, val); - break; - case 3: - mtspr(SPRN_PMC3, val); - break; - case 4: - mtspr(SPRN_PMC4, val); - break; - case 5: - mtspr(SPRN_PMC5, val); - break; - case 6: - mtspr(SPRN_PMC6, val); - break; -#ifdef CONFIG_PPC64 - case 7: - mtspr(SPRN_PMC7, val); - break; - case 8: - mtspr(SPRN_PMC8, val); - break; -#endif /* CONFIG_PPC64 */ - default: - printk(KERN_ERR "oops trying to write PMC%d\n", idx); - } -} - -/* - * Check if a set of events can all go on the PMU at once. - * If they can't, this will look at alternative codes for the events - * and see if any combination of alternative codes is feasible. - * The feasible set is returned in event[]. - */ -static int power_check_constraints(struct cpu_hw_counters *cpuhw, - u64 event[], unsigned int cflags[], - int n_ev) -{ - unsigned long mask, value, nv; - unsigned long smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; - int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS]; - int i, j; - unsigned long addf = ppmu->add_fields; - unsigned long tadd = ppmu->test_adder; - - if (n_ev > ppmu->n_counter) - return -1; - - /* First see if the events will go on as-is */ - for (i = 0; i < n_ev; ++i) { - if ((cflags[i] & PPMU_LIMITED_PMC_REQD) - && !ppmu->limited_pmc_event(event[i])) { - ppmu->get_alternatives(event[i], cflags[i], - cpuhw->alternatives[i]); - event[i] = cpuhw->alternatives[i][0]; - } - if (ppmu->get_constraint(event[i], &cpuhw->amasks[i][0], - &cpuhw->avalues[i][0])) - return -1; - } - value = mask = 0; - for (i = 0; i < n_ev; ++i) { - nv = (value | cpuhw->avalues[i][0]) + - (value & cpuhw->avalues[i][0] & addf); - if ((((nv + tadd) ^ value) & mask) != 0 || - (((nv + tadd) ^ cpuhw->avalues[i][0]) & - cpuhw->amasks[i][0]) != 0) - break; - value = nv; - mask |= cpuhw->amasks[i][0]; - } - if (i == n_ev) - return 0; /* all OK */ - - /* doesn't work, gather alternatives... */ - if (!ppmu->get_alternatives) - return -1; - for (i = 0; i < n_ev; ++i) { - choice[i] = 0; - n_alt[i] = ppmu->get_alternatives(event[i], cflags[i], - cpuhw->alternatives[i]); - for (j = 1; j < n_alt[i]; ++j) - ppmu->get_constraint(cpuhw->alternatives[i][j], - &cpuhw->amasks[i][j], - &cpuhw->avalues[i][j]); - } - - /* enumerate all possibilities and see if any will work */ - i = 0; - j = -1; - value = mask = nv = 0; - while (i < n_ev) { - if (j >= 0) { - /* we're backtracking, restore context */ - value = svalues[i]; - mask = smasks[i]; - j = choice[i]; - } - /* - * See if any alternative k for event i, - * where k > j, will satisfy the constraints. - */ - while (++j < n_alt[i]) { - nv = (value | cpuhw->avalues[i][j]) + - (value & cpuhw->avalues[i][j] & addf); - if ((((nv + tadd) ^ value) & mask) == 0 && - (((nv + tadd) ^ cpuhw->avalues[i][j]) - & cpuhw->amasks[i][j]) == 0) - break; - } - if (j >= n_alt[i]) { - /* - * No feasible alternative, backtrack - * to event i-1 and continue enumerating its - * alternatives from where we got up to. - */ - if (--i < 0) - return -1; - } else { - /* - * Found a feasible alternative for event i, - * remember where we got up to with this event, - * go on to the next event, and start with - * the first alternative for it. - */ - choice[i] = j; - svalues[i] = value; - smasks[i] = mask; - value = nv; - mask |= cpuhw->amasks[i][j]; - ++i; - j = -1; - } - } - - /* OK, we have a feasible combination, tell the caller the solution */ - for (i = 0; i < n_ev; ++i) - event[i] = cpuhw->alternatives[i][choice[i]]; - return 0; -} - -/* - * Check if newly-added counters have consistent settings for - * exclude_{user,kernel,hv} with each other and any previously - * added counters. - */ -static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[], - int n_prev, int n_new) -{ - int eu = 0, ek = 0, eh = 0; - int i, n, first; - struct perf_counter *counter; - - n = n_prev + n_new; - if (n <= 1) - return 0; - - first = 1; - for (i = 0; i < n; ++i) { - if (cflags[i] & PPMU_LIMITED_PMC_OK) { - cflags[i] &= ~PPMU_LIMITED_PMC_REQD; - continue; - } - counter = ctrs[i]; - if (first) { - eu = counter->attr.exclude_user; - ek = counter->attr.exclude_kernel; - eh = counter->attr.exclude_hv; - first = 0; - } else if (counter->attr.exclude_user != eu || - counter->attr.exclude_kernel != ek || - counter->attr.exclude_hv != eh) { - return -EAGAIN; - } - } - - if (eu || ek || eh) - for (i = 0; i < n; ++i) - if (cflags[i] & PPMU_LIMITED_PMC_OK) - cflags[i] |= PPMU_LIMITED_PMC_REQD; - - return 0; -} - -static void power_pmu_read(struct perf_counter *counter) -{ - s64 val, delta, prev; - - if (!counter->hw.idx) - return; - /* - * Performance monitor interrupts come even when interrupts - * are soft-disabled, as long as interrupts are hard-enabled. - * Therefore we treat them like NMIs. - */ - do { - prev = atomic64_read(&counter->hw.prev_count); - barrier(); - val = read_pmc(counter->hw.idx); - } while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev); - - /* The counters are only 32 bits wide */ - delta = (val - prev) & 0xfffffffful; - atomic64_add(delta, &counter->count); - atomic64_sub(delta, &counter->hw.period_left); -} - -/* - * On some machines, PMC5 and PMC6 can't be written, don't respect - * the freeze conditions, and don't generate interrupts. This tells - * us if `counter' is using such a PMC. - */ -static int is_limited_pmc(int pmcnum) -{ - return (ppmu->flags & PPMU_LIMITED_PMC5_6) - && (pmcnum == 5 || pmcnum == 6); -} - -static void freeze_limited_counters(struct cpu_hw_counters *cpuhw, - unsigned long pmc5, unsigned long pmc6) -{ - struct perf_counter *counter; - u64 val, prev, delta; - int i; - - for (i = 0; i < cpuhw->n_limited; ++i) { - counter = cpuhw->limited_counter[i]; - if (!counter->hw.idx) - continue; - val = (counter->hw.idx == 5) ? pmc5 : pmc6; - prev = atomic64_read(&counter->hw.prev_count); - counter->hw.idx = 0; - delta = (val - prev) & 0xfffffffful; - atomic64_add(delta, &counter->count); - } -} - -static void thaw_limited_counters(struct cpu_hw_counters *cpuhw, - unsigned long pmc5, unsigned long pmc6) -{ - struct perf_counter *counter; - u64 val; - int i; - - for (i = 0; i < cpuhw->n_limited; ++i) { - counter = cpuhw->limited_counter[i]; - counter->hw.idx = cpuhw->limited_hwidx[i]; - val = (counter->hw.idx == 5) ? pmc5 : pmc6; - atomic64_set(&counter->hw.prev_count, val); - perf_counter_update_userpage(counter); - } -} - -/* - * Since limited counters don't respect the freeze conditions, we - * have to read them immediately after freezing or unfreezing the - * other counters. We try to keep the values from the limited - * counters as consistent as possible by keeping the delay (in - * cycles and instructions) between freezing/unfreezing and reading - * the limited counters as small and consistent as possible. - * Therefore, if any limited counters are in use, we read them - * both, and always in the same order, to minimize variability, - * and do it inside the same asm that writes MMCR0. - */ -static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0) -{ - unsigned long pmc5, pmc6; - - if (!cpuhw->n_limited) { - mtspr(SPRN_MMCR0, mmcr0); - return; - } - - /* - * Write MMCR0, then read PMC5 and PMC6 immediately. - * To ensure we don't get a performance monitor interrupt - * between writing MMCR0 and freezing/thawing the limited - * counters, we first write MMCR0 with the counter overflow - * interrupt enable bits turned off. - */ - asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5" - : "=&r" (pmc5), "=&r" (pmc6) - : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)), - "i" (SPRN_MMCR0), - "i" (SPRN_PMC5), "i" (SPRN_PMC6)); - - if (mmcr0 & MMCR0_FC) - freeze_limited_counters(cpuhw, pmc5, pmc6); - else - thaw_limited_counters(cpuhw, pmc5, pmc6); - - /* - * Write the full MMCR0 including the counter overflow interrupt - * enable bits, if necessary. - */ - if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE)) - mtspr(SPRN_MMCR0, mmcr0); -} - -/* - * Disable all counters to prevent PMU interrupts and to allow - * counters to be added or removed. - */ -void hw_perf_disable(void) -{ - struct cpu_hw_counters *cpuhw; - unsigned long flags; - - if (!ppmu) - return; - local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_counters); - - if (!cpuhw->disabled) { - cpuhw->disabled = 1; - cpuhw->n_added = 0; - - /* - * Check if we ever enabled the PMU on this cpu. - */ - if (!cpuhw->pmcs_enabled) { - ppc_enable_pmcs(); - cpuhw->pmcs_enabled = 1; - } - - /* - * Disable instruction sampling if it was enabled - */ - if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { - mtspr(SPRN_MMCRA, - cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mb(); - } - - /* - * Set the 'freeze counters' bit. - * The barrier is to make sure the mtspr has been - * executed and the PMU has frozen the counters - * before we return. - */ - write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC); - mb(); - } - local_irq_restore(flags); -} - -/* - * Re-enable all counters if disable == 0. - * If we were previously disabled and counters were added, then - * put the new config on the PMU. - */ -void hw_perf_enable(void) -{ - struct perf_counter *counter; - struct cpu_hw_counters *cpuhw; - unsigned long flags; - long i; - unsigned long val; - s64 left; - unsigned int hwc_index[MAX_HWCOUNTERS]; - int n_lim; - int idx; - - if (!ppmu) - return; - local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_counters); - if (!cpuhw->disabled) { - local_irq_restore(flags); - return; - } - cpuhw->disabled = 0; - - /* - * If we didn't change anything, or only removed counters, - * no need to recalculate MMCR* settings and reset the PMCs. - * Just reenable the PMU with the current MMCR* settings - * (possibly updated for removal of counters). - */ - if (!cpuhw->n_added) { - mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); - if (cpuhw->n_counters == 0) - ppc_set_pmu_inuse(0); - goto out_enable; - } - - /* - * Compute MMCR* values for the new set of counters - */ - if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index, - cpuhw->mmcr)) { - /* shouldn't ever get here */ - printk(KERN_ERR "oops compute_mmcr failed\n"); - goto out; - } - - /* - * Add in MMCR0 freeze bits corresponding to the - * attr.exclude_* bits for the first counter. - * We have already checked that all counters have the - * same values for these bits as the first counter. - */ - counter = cpuhw->counter[0]; - if (counter->attr.exclude_user) - cpuhw->mmcr[0] |= MMCR0_FCP; - if (counter->attr.exclude_kernel) - cpuhw->mmcr[0] |= freeze_counters_kernel; - if (counter->attr.exclude_hv) - cpuhw->mmcr[0] |= MMCR0_FCHV; - - /* - * Write the new configuration to MMCR* with the freeze - * bit set and set the hardware counters to their initial values. - * Then unfreeze the counters. - */ - ppc_set_pmu_inuse(1); - mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); - mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) - | MMCR0_FC); - - /* - * Read off any pre-existing counters that need to move - * to another PMC. - */ - for (i = 0; i < cpuhw->n_counters; ++i) { - counter = cpuhw->counter[i]; - if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) { - power_pmu_read(counter); - write_pmc(counter->hw.idx, 0); - counter->hw.idx = 0; - } - } - - /* - * Initialize the PMCs for all the new and moved counters. - */ - cpuhw->n_limited = n_lim = 0; - for (i = 0; i < cpuhw->n_counters; ++i) { - counter = cpuhw->counter[i]; - if (counter->hw.idx) - continue; - idx = hwc_index[i] + 1; - if (is_limited_pmc(idx)) { - cpuhw->limited_counter[n_lim] = counter; - cpuhw->limited_hwidx[n_lim] = idx; - ++n_lim; - continue; - } - val = 0; - if (counter->hw.sample_period) { - left = atomic64_read(&counter->hw.period_left); - if (left < 0x80000000L) - val = 0x80000000L - left; - } - atomic64_set(&counter->hw.prev_count, val); - counter->hw.idx = idx; - write_pmc(idx, val); - perf_counter_update_userpage(counter); - } - cpuhw->n_limited = n_lim; - cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; - - out_enable: - mb(); - write_mmcr0(cpuhw, cpuhw->mmcr[0]); - - /* - * Enable instruction sampling if necessary - */ - if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { - mb(); - mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); - } - - out: - local_irq_restore(flags); -} - -static int collect_events(struct perf_counter *group, int max_count, - struct perf_counter *ctrs[], u64 *events, - unsigned int *flags) -{ - int n = 0; - struct perf_counter *counter; - - if (!is_software_counter(group)) { - if (n >= max_count) - return -1; - ctrs[n] = group; - flags[n] = group->hw.counter_base; - events[n++] = group->hw.config; - } - list_for_each_entry(counter, &group->sibling_list, list_entry) { - if (!is_software_counter(counter) && - counter->state != PERF_COUNTER_STATE_OFF) { - if (n >= max_count) - return -1; - ctrs[n] = counter; - flags[n] = counter->hw.counter_base; - events[n++] = counter->hw.config; - } - } - return n; -} - -static void counter_sched_in(struct perf_counter *counter, int cpu) -{ - counter->state = PERF_COUNTER_STATE_ACTIVE; - counter->oncpu = cpu; - counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped; - if (is_software_counter(counter)) - counter->pmu->enable(counter); -} - -/* - * Called to enable a whole group of counters. - * Returns 1 if the group was enabled, or -EAGAIN if it could not be. - * Assumes the caller has disabled interrupts and has - * frozen the PMU with hw_perf_save_disable. - */ -int hw_perf_group_sched_in(struct perf_counter *group_leader, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, int cpu) -{ - struct cpu_hw_counters *cpuhw; - long i, n, n0; - struct perf_counter *sub; - - if (!ppmu) - return 0; - cpuhw = &__get_cpu_var(cpu_hw_counters); - n0 = cpuhw->n_counters; - n = collect_events(group_leader, ppmu->n_counter - n0, - &cpuhw->counter[n0], &cpuhw->events[n0], - &cpuhw->flags[n0]); - if (n < 0) - return -EAGAIN; - if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n)) - return -EAGAIN; - i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0); - if (i < 0) - return -EAGAIN; - cpuhw->n_counters = n0 + n; - cpuhw->n_added += n; - - /* - * OK, this group can go on; update counter states etc., - * and enable any software counters - */ - for (i = n0; i < n0 + n; ++i) - cpuhw->counter[i]->hw.config = cpuhw->events[i]; - cpuctx->active_oncpu += n; - n = 1; - counter_sched_in(group_leader, cpu); - list_for_each_entry(sub, &group_leader->sibling_list, list_entry) { - if (sub->state != PERF_COUNTER_STATE_OFF) { - counter_sched_in(sub, cpu); - ++n; - } - } - ctx->nr_active += n; - - return 1; -} - -/* - * Add a counter to the PMU. - * If all counters are not already frozen, then we disable and - * re-enable the PMU in order to get hw_perf_enable to do the - * actual work of reconfiguring the PMU. - */ -static int power_pmu_enable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuhw; - unsigned long flags; - int n0; - int ret = -EAGAIN; - - local_irq_save(flags); - perf_disable(); - - /* - * Add the counter to the list (if there is room) - * and check whether the total set is still feasible. - */ - cpuhw = &__get_cpu_var(cpu_hw_counters); - n0 = cpuhw->n_counters; - if (n0 >= ppmu->n_counter) - goto out; - cpuhw->counter[n0] = counter; - cpuhw->events[n0] = counter->hw.config; - cpuhw->flags[n0] = counter->hw.counter_base; - if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1)) - goto out; - if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) - goto out; - - counter->hw.config = cpuhw->events[n0]; - ++cpuhw->n_counters; - ++cpuhw->n_added; - - ret = 0; - out: - perf_enable(); - local_irq_restore(flags); - return ret; -} - -/* - * Remove a counter from the PMU. - */ -static void power_pmu_disable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuhw; - long i; - unsigned long flags; - - local_irq_save(flags); - perf_disable(); - - power_pmu_read(counter); - - cpuhw = &__get_cpu_var(cpu_hw_counters); - for (i = 0; i < cpuhw->n_counters; ++i) { - if (counter == cpuhw->counter[i]) { - while (++i < cpuhw->n_counters) - cpuhw->counter[i-1] = cpuhw->counter[i]; - --cpuhw->n_counters; - ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr); - if (counter->hw.idx) { - write_pmc(counter->hw.idx, 0); - counter->hw.idx = 0; - } - perf_counter_update_userpage(counter); - break; - } - } - for (i = 0; i < cpuhw->n_limited; ++i) - if (counter == cpuhw->limited_counter[i]) - break; - if (i < cpuhw->n_limited) { - while (++i < cpuhw->n_limited) { - cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i]; - cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i]; - } - --cpuhw->n_limited; - } - if (cpuhw->n_counters == 0) { - /* disable exceptions if no counters are running */ - cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); - } - - perf_enable(); - local_irq_restore(flags); -} - -/* - * Re-enable interrupts on a counter after they were throttled - * because they were coming too fast. - */ -static void power_pmu_unthrottle(struct perf_counter *counter) -{ - s64 val, left; - unsigned long flags; - - if (!counter->hw.idx || !counter->hw.sample_period) - return; - local_irq_save(flags); - perf_disable(); - power_pmu_read(counter); - left = counter->hw.sample_period; - counter->hw.last_period = left; - val = 0; - if (left < 0x80000000L) - val = 0x80000000L - left; - write_pmc(counter->hw.idx, val); - atomic64_set(&counter->hw.prev_count, val); - atomic64_set(&counter->hw.period_left, left); - perf_counter_update_userpage(counter); - perf_enable(); - local_irq_restore(flags); -} - -struct pmu power_pmu = { - .enable = power_pmu_enable, - .disable = power_pmu_disable, - .read = power_pmu_read, - .unthrottle = power_pmu_unthrottle, -}; - -/* - * Return 1 if we might be able to put counter on a limited PMC, - * or 0 if not. - * A counter can only go on a limited PMC if it counts something - * that a limited PMC can count, doesn't require interrupts, and - * doesn't exclude any processor mode. - */ -static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev, - unsigned int flags) -{ - int n; - u64 alt[MAX_EVENT_ALTERNATIVES]; - - if (counter->attr.exclude_user - || counter->attr.exclude_kernel - || counter->attr.exclude_hv - || counter->attr.sample_period) - return 0; - - if (ppmu->limited_pmc_event(ev)) - return 1; - - /* - * The requested event isn't on a limited PMC already; - * see if any alternative code goes on a limited PMC. - */ - if (!ppmu->get_alternatives) - return 0; - - flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD; - n = ppmu->get_alternatives(ev, flags, alt); - - return n > 0; -} - -/* - * Find an alternative event that goes on a normal PMC, if possible, - * and return the event code, or 0 if there is no such alternative. - * (Note: event code 0 is "don't count" on all machines.) - */ -static u64 normal_pmc_alternative(u64 ev, unsigned long flags) -{ - u64 alt[MAX_EVENT_ALTERNATIVES]; - int n; - - flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD); - n = ppmu->get_alternatives(ev, flags, alt); - if (!n) - return 0; - return alt[0]; -} - -/* Number of perf_counters counting hardware events */ -static atomic_t num_counters; -/* Used to avoid races in calling reserve/release_pmc_hardware */ -static DEFINE_MUTEX(pmc_reserve_mutex); - -/* - * Release the PMU if this is the last perf_counter. - */ -static void hw_perf_counter_destroy(struct perf_counter *counter) -{ - if (!atomic_add_unless(&num_counters, -1, 1)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_dec_return(&num_counters) == 0) - release_pmc_hardware(); - mutex_unlock(&pmc_reserve_mutex); - } -} - -/* - * Translate a generic cache event config to a raw event code. - */ -static int hw_perf_cache_event(u64 config, u64 *eventp) -{ - unsigned long type, op, result; - int ev; - - if (!ppmu->cache_events) - return -EINVAL; - - /* unpack config */ - type = config & 0xff; - op = (config >> 8) & 0xff; - result = (config >> 16) & 0xff; - - if (type >= PERF_COUNT_HW_CACHE_MAX || - op >= PERF_COUNT_HW_CACHE_OP_MAX || - result >= PERF_COUNT_HW_CACHE_RESULT_MAX) - return -EINVAL; - - ev = (*ppmu->cache_events)[type][op][result]; - if (ev == 0) - return -EOPNOTSUPP; - if (ev == -1) - return -EINVAL; - *eventp = ev; - return 0; -} - -const struct pmu *hw_perf_counter_init(struct perf_counter *counter) -{ - u64 ev; - unsigned long flags; - struct perf_counter *ctrs[MAX_HWCOUNTERS]; - u64 events[MAX_HWCOUNTERS]; - unsigned int cflags[MAX_HWCOUNTERS]; - int n; - int err; - struct cpu_hw_counters *cpuhw; - - if (!ppmu) - return ERR_PTR(-ENXIO); - switch (counter->attr.type) { - case PERF_TYPE_HARDWARE: - ev = counter->attr.config; - if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return ERR_PTR(-EOPNOTSUPP); - ev = ppmu->generic_events[ev]; - break; - case PERF_TYPE_HW_CACHE: - err = hw_perf_cache_event(counter->attr.config, &ev); - if (err) - return ERR_PTR(err); - break; - case PERF_TYPE_RAW: - ev = counter->attr.config; - break; - default: - return ERR_PTR(-EINVAL); - } - counter->hw.config_base = ev; - counter->hw.idx = 0; - - /* - * If we are not running on a hypervisor, force the - * exclude_hv bit to 0 so that we don't care what - * the user set it to. - */ - if (!firmware_has_feature(FW_FEATURE_LPAR)) - counter->attr.exclude_hv = 0; - - /* - * If this is a per-task counter, then we can use - * PM_RUN_* events interchangeably with their non RUN_* - * equivalents, e.g. PM_RUN_CYC instead of PM_CYC. - * XXX we should check if the task is an idle task. - */ - flags = 0; - if (counter->ctx->task) - flags |= PPMU_ONLY_COUNT_RUN; - - /* - * If this machine has limited counters, check whether this - * event could go on a limited counter. - */ - if (ppmu->flags & PPMU_LIMITED_PMC5_6) { - if (can_go_on_limited_pmc(counter, ev, flags)) { - flags |= PPMU_LIMITED_PMC_OK; - } else if (ppmu->limited_pmc_event(ev)) { - /* - * The requested event is on a limited PMC, - * but we can't use a limited PMC; see if any - * alternative goes on a normal PMC. - */ - ev = normal_pmc_alternative(ev, flags); - if (!ev) - return ERR_PTR(-EINVAL); - } - } - - /* - * If this is in a group, check if it can go on with all the - * other hardware counters in the group. We assume the counter - * hasn't been linked into its leader's sibling list at this point. - */ - n = 0; - if (counter->group_leader != counter) { - n = collect_events(counter->group_leader, ppmu->n_counter - 1, - ctrs, events, cflags); - if (n < 0) - return ERR_PTR(-EINVAL); - } - events[n] = ev; - ctrs[n] = counter; - cflags[n] = flags; - if (check_excludes(ctrs, cflags, n, 1)) - return ERR_PTR(-EINVAL); - - cpuhw = &get_cpu_var(cpu_hw_counters); - err = power_check_constraints(cpuhw, events, cflags, n + 1); - put_cpu_var(cpu_hw_counters); - if (err) - return ERR_PTR(-EINVAL); - - counter->hw.config = events[n]; - counter->hw.counter_base = cflags[n]; - counter->hw.last_period = counter->hw.sample_period; - atomic64_set(&counter->hw.period_left, counter->hw.last_period); - - /* - * See if we need to reserve the PMU. - * If no counters are currently in use, then we have to take a - * mutex to ensure that we don't race with another task doing - * reserve_pmc_hardware or release_pmc_hardware. - */ - err = 0; - if (!atomic_inc_not_zero(&num_counters)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&num_counters) == 0 && - reserve_pmc_hardware(perf_counter_interrupt)) - err = -EBUSY; - else - atomic_inc(&num_counters); - mutex_unlock(&pmc_reserve_mutex); - } - counter->destroy = hw_perf_counter_destroy; - - if (err) - return ERR_PTR(err); - return &power_pmu; -} - -/* - * A counter has overflowed; update its count and record - * things if requested. Note that interrupts are hard-disabled - * here so there is no possibility of being interrupted. - */ -static void record_and_restart(struct perf_counter *counter, unsigned long val, - struct pt_regs *regs, int nmi) -{ - u64 period = counter->hw.sample_period; - s64 prev, delta, left; - int record = 0; - - /* we don't have to worry about interrupts here */ - prev = atomic64_read(&counter->hw.prev_count); - delta = (val - prev) & 0xfffffffful; - atomic64_add(delta, &counter->count); - - /* - * See if the total period for this counter has expired, - * and update for the next period. - */ - val = 0; - left = atomic64_read(&counter->hw.period_left) - delta; - if (period) { - if (left <= 0) { - left += period; - if (left <= 0) - left = period; - record = 1; - } - if (left < 0x80000000LL) - val = 0x80000000LL - left; - } - - /* - * Finally record data if requested. - */ - if (record) { - struct perf_sample_data data = { - .addr = 0, - .period = counter->hw.last_period, - }; - - if (counter->attr.sample_type & PERF_SAMPLE_ADDR) - perf_get_data_addr(regs, &data.addr); - - if (perf_counter_overflow(counter, nmi, &data, regs)) { - /* - * Interrupts are coming too fast - throttle them - * by setting the counter to 0, so it will be - * at least 2^30 cycles until the next interrupt - * (assuming each counter counts at most 2 counts - * per cycle). - */ - val = 0; - left = ~0ULL >> 1; - } - } - - write_pmc(counter->hw.idx, val); - atomic64_set(&counter->hw.prev_count, val); - atomic64_set(&counter->hw.period_left, left); - perf_counter_update_userpage(counter); -} - -/* - * Called from generic code to get the misc flags (i.e. processor mode) - * for an event. - */ -unsigned long perf_misc_flags(struct pt_regs *regs) -{ - u32 flags = perf_get_misc_flags(regs); - - if (flags) - return flags; - return user_mode(regs) ? PERF_EVENT_MISC_USER : - PERF_EVENT_MISC_KERNEL; -} - -/* - * Called from generic code to get the instruction pointer - * for an event. - */ -unsigned long perf_instruction_pointer(struct pt_regs *regs) -{ - unsigned long ip; - - if (TRAP(regs) != 0xf00) - return regs->nip; /* not a PMU interrupt */ - - ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs); - return ip; -} - -/* - * Performance monitor interrupt stuff - */ -static void perf_counter_interrupt(struct pt_regs *regs) -{ - int i; - struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); - struct perf_counter *counter; - unsigned long val; - int found = 0; - int nmi; - - if (cpuhw->n_limited) - freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), - mfspr(SPRN_PMC6)); - - perf_read_regs(regs); - - nmi = perf_intr_is_nmi(regs); - if (nmi) - nmi_enter(); - else - irq_enter(); - - for (i = 0; i < cpuhw->n_counters; ++i) { - counter = cpuhw->counter[i]; - if (!counter->hw.idx || is_limited_pmc(counter->hw.idx)) - continue; - val = read_pmc(counter->hw.idx); - if ((int)val < 0) { - /* counter has overflowed */ - found = 1; - record_and_restart(counter, val, regs, nmi); - } - } - - /* - * In case we didn't find and reset the counter that caused - * the interrupt, scan all counters and reset any that are - * negative, to avoid getting continual interrupts. - * Any that we processed in the previous loop will not be negative. - */ - if (!found) { - for (i = 0; i < ppmu->n_counter; ++i) { - if (is_limited_pmc(i + 1)) - continue; - val = read_pmc(i + 1); - if ((int)val < 0) - write_pmc(i + 1, 0); - } - } - - /* - * Reset MMCR0 to its normal value. This will set PMXE and - * clear FC (freeze counters) and PMAO (perf mon alert occurred) - * and thus allow interrupts to occur again. - * XXX might want to use MSR.PM to keep the counters frozen until - * we get back out of this interrupt. - */ - write_mmcr0(cpuhw, cpuhw->mmcr[0]); - - if (nmi) - nmi_exit(); - else - irq_exit(); -} - -void hw_perf_counter_setup(int cpu) -{ - struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu); - - if (!ppmu) - return; - memset(cpuhw, 0, sizeof(*cpuhw)); - cpuhw->mmcr[0] = MMCR0_FC; -} - -int register_power_pmu(struct power_pmu *pmu) -{ - if (ppmu) - return -EBUSY; /* something's already registered */ - - ppmu = pmu; - pr_info("%s performance monitor hardware support registered\n", - pmu->name); - -#ifdef MSR_HV - /* - * Use FCHV to ignore kernel events if MSR.HV is set. - */ - if (mfmsr() & MSR_HV) - freeze_counters_kernel = MMCR0_FCHV; -#endif /* CONFIG_PPC64 */ - - return 0; -} diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c new file mode 100644 index 000000000000..c98321fcb459 --- /dev/null +++ b/arch/powerpc/kernel/perf_event.c @@ -0,0 +1,1315 @@ +/* + * Performance event support - powerpc architecture code + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct cpu_hw_events { + int n_events; + int n_percpu; + int disabled; + int n_added; + int n_limited; + u8 pmcs_enabled; + struct perf_event *event[MAX_HWEVENTS]; + u64 events[MAX_HWEVENTS]; + unsigned int flags[MAX_HWEVENTS]; + unsigned long mmcr[3]; + struct perf_event *limited_event[MAX_LIMITED_HWEVENTS]; + u8 limited_hwidx[MAX_LIMITED_HWEVENTS]; + u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; + unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; + unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; +}; +DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); + +struct power_pmu *ppmu; + +/* + * Normally, to ignore kernel events we set the FCS (freeze events + * in supervisor mode) bit in MMCR0, but if the kernel runs with the + * hypervisor bit set in the MSR, or if we are running on a processor + * where the hypervisor bit is forced to 1 (as on Apple G5 processors), + * then we need to use the FCHV bit to ignore kernel events. + */ +static unsigned int freeze_events_kernel = MMCR0_FCS; + +/* + * 32-bit doesn't have MMCRA but does have an MMCR2, + * and a few other names are different. + */ +#ifdef CONFIG_PPC32 + +#define MMCR0_FCHV 0 +#define MMCR0_PMCjCE MMCR0_PMCnCE + +#define SPRN_MMCRA SPRN_MMCR2 +#define MMCRA_SAMPLE_ENABLE 0 + +static inline unsigned long perf_ip_adjust(struct pt_regs *regs) +{ + return 0; +} +static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { } +static inline u32 perf_get_misc_flags(struct pt_regs *regs) +{ + return 0; +} +static inline void perf_read_regs(struct pt_regs *regs) { } +static inline int perf_intr_is_nmi(struct pt_regs *regs) +{ + return 0; +} + +#endif /* CONFIG_PPC32 */ + +/* + * Things that are specific to 64-bit implementations. + */ +#ifdef CONFIG_PPC64 + +static inline unsigned long perf_ip_adjust(struct pt_regs *regs) +{ + unsigned long mmcra = regs->dsisr; + + if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { + unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; + if (slot > 1) + return 4 * (slot - 1); + } + return 0; +} + +/* + * The user wants a data address recorded. + * If we're not doing instruction sampling, give them the SDAR + * (sampled data address). If we are doing instruction sampling, then + * only give them the SDAR if it corresponds to the instruction + * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC + * bit in MMCRA. + */ +static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) +{ + unsigned long mmcra = regs->dsisr; + unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? + POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; + + if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) + *addrp = mfspr(SPRN_SDAR); +} + +static inline u32 perf_get_misc_flags(struct pt_regs *regs) +{ + unsigned long mmcra = regs->dsisr; + + if (TRAP(regs) != 0xf00) + return 0; /* not a PMU interrupt */ + + if (ppmu->flags & PPMU_ALT_SIPR) { + if (mmcra & POWER6_MMCRA_SIHV) + return PERF_RECORD_MISC_HYPERVISOR; + return (mmcra & POWER6_MMCRA_SIPR) ? + PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL; + } + if (mmcra & MMCRA_SIHV) + return PERF_RECORD_MISC_HYPERVISOR; + return (mmcra & MMCRA_SIPR) ? PERF_RECORD_MISC_USER : + PERF_RECORD_MISC_KERNEL; +} + +/* + * Overload regs->dsisr to store MMCRA so we only need to read it once + * on each interrupt. + */ +static inline void perf_read_regs(struct pt_regs *regs) +{ + regs->dsisr = mfspr(SPRN_MMCRA); +} + +/* + * If interrupts were soft-disabled when a PMU interrupt occurs, treat + * it as an NMI. + */ +static inline int perf_intr_is_nmi(struct pt_regs *regs) +{ + return !regs->softe; +} + +#endif /* CONFIG_PPC64 */ + +static void perf_event_interrupt(struct pt_regs *regs); + +void perf_event_print_debug(void) +{ +} + +/* + * Read one performance monitor event (PMC). + */ +static unsigned long read_pmc(int idx) +{ + unsigned long val; + + switch (idx) { + case 1: + val = mfspr(SPRN_PMC1); + break; + case 2: + val = mfspr(SPRN_PMC2); + break; + case 3: + val = mfspr(SPRN_PMC3); + break; + case 4: + val = mfspr(SPRN_PMC4); + break; + case 5: + val = mfspr(SPRN_PMC5); + break; + case 6: + val = mfspr(SPRN_PMC6); + break; +#ifdef CONFIG_PPC64 + case 7: + val = mfspr(SPRN_PMC7); + break; + case 8: + val = mfspr(SPRN_PMC8); + break; +#endif /* CONFIG_PPC64 */ + default: + printk(KERN_ERR "oops trying to read PMC%d\n", idx); + val = 0; + } + return val; +} + +/* + * Write one PMC. + */ +static void write_pmc(int idx, unsigned long val) +{ + switch (idx) { + case 1: + mtspr(SPRN_PMC1, val); + break; + case 2: + mtspr(SPRN_PMC2, val); + break; + case 3: + mtspr(SPRN_PMC3, val); + break; + case 4: + mtspr(SPRN_PMC4, val); + break; + case 5: + mtspr(SPRN_PMC5, val); + break; + case 6: + mtspr(SPRN_PMC6, val); + break; +#ifdef CONFIG_PPC64 + case 7: + mtspr(SPRN_PMC7, val); + break; + case 8: + mtspr(SPRN_PMC8, val); + break; +#endif /* CONFIG_PPC64 */ + default: + printk(KERN_ERR "oops trying to write PMC%d\n", idx); + } +} + +/* + * Check if a set of events can all go on the PMU at once. + * If they can't, this will look at alternative codes for the events + * and see if any combination of alternative codes is feasible. + * The feasible set is returned in event_id[]. + */ +static int power_check_constraints(struct cpu_hw_events *cpuhw, + u64 event_id[], unsigned int cflags[], + int n_ev) +{ + unsigned long mask, value, nv; + unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS]; + int n_alt[MAX_HWEVENTS], choice[MAX_HWEVENTS]; + int i, j; + unsigned long addf = ppmu->add_fields; + unsigned long tadd = ppmu->test_adder; + + if (n_ev > ppmu->n_event) + return -1; + + /* First see if the events will go on as-is */ + for (i = 0; i < n_ev; ++i) { + if ((cflags[i] & PPMU_LIMITED_PMC_REQD) + && !ppmu->limited_pmc_event(event_id[i])) { + ppmu->get_alternatives(event_id[i], cflags[i], + cpuhw->alternatives[i]); + event_id[i] = cpuhw->alternatives[i][0]; + } + if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0], + &cpuhw->avalues[i][0])) + return -1; + } + value = mask = 0; + for (i = 0; i < n_ev; ++i) { + nv = (value | cpuhw->avalues[i][0]) + + (value & cpuhw->avalues[i][0] & addf); + if ((((nv + tadd) ^ value) & mask) != 0 || + (((nv + tadd) ^ cpuhw->avalues[i][0]) & + cpuhw->amasks[i][0]) != 0) + break; + value = nv; + mask |= cpuhw->amasks[i][0]; + } + if (i == n_ev) + return 0; /* all OK */ + + /* doesn't work, gather alternatives... */ + if (!ppmu->get_alternatives) + return -1; + for (i = 0; i < n_ev; ++i) { + choice[i] = 0; + n_alt[i] = ppmu->get_alternatives(event_id[i], cflags[i], + cpuhw->alternatives[i]); + for (j = 1; j < n_alt[i]; ++j) + ppmu->get_constraint(cpuhw->alternatives[i][j], + &cpuhw->amasks[i][j], + &cpuhw->avalues[i][j]); + } + + /* enumerate all possibilities and see if any will work */ + i = 0; + j = -1; + value = mask = nv = 0; + while (i < n_ev) { + if (j >= 0) { + /* we're backtracking, restore context */ + value = svalues[i]; + mask = smasks[i]; + j = choice[i]; + } + /* + * See if any alternative k for event_id i, + * where k > j, will satisfy the constraints. + */ + while (++j < n_alt[i]) { + nv = (value | cpuhw->avalues[i][j]) + + (value & cpuhw->avalues[i][j] & addf); + if ((((nv + tadd) ^ value) & mask) == 0 && + (((nv + tadd) ^ cpuhw->avalues[i][j]) + & cpuhw->amasks[i][j]) == 0) + break; + } + if (j >= n_alt[i]) { + /* + * No feasible alternative, backtrack + * to event_id i-1 and continue enumerating its + * alternatives from where we got up to. + */ + if (--i < 0) + return -1; + } else { + /* + * Found a feasible alternative for event_id i, + * remember where we got up to with this event_id, + * go on to the next event_id, and start with + * the first alternative for it. + */ + choice[i] = j; + svalues[i] = value; + smasks[i] = mask; + value = nv; + mask |= cpuhw->amasks[i][j]; + ++i; + j = -1; + } + } + + /* OK, we have a feasible combination, tell the caller the solution */ + for (i = 0; i < n_ev; ++i) + event_id[i] = cpuhw->alternatives[i][choice[i]]; + return 0; +} + +/* + * Check if newly-added events have consistent settings for + * exclude_{user,kernel,hv} with each other and any previously + * added events. + */ +static int check_excludes(struct perf_event **ctrs, unsigned int cflags[], + int n_prev, int n_new) +{ + int eu = 0, ek = 0, eh = 0; + int i, n, first; + struct perf_event *event; + + n = n_prev + n_new; + if (n <= 1) + return 0; + + first = 1; + for (i = 0; i < n; ++i) { + if (cflags[i] & PPMU_LIMITED_PMC_OK) { + cflags[i] &= ~PPMU_LIMITED_PMC_REQD; + continue; + } + event = ctrs[i]; + if (first) { + eu = event->attr.exclude_user; + ek = event->attr.exclude_kernel; + eh = event->attr.exclude_hv; + first = 0; + } else if (event->attr.exclude_user != eu || + event->attr.exclude_kernel != ek || + event->attr.exclude_hv != eh) { + return -EAGAIN; + } + } + + if (eu || ek || eh) + for (i = 0; i < n; ++i) + if (cflags[i] & PPMU_LIMITED_PMC_OK) + cflags[i] |= PPMU_LIMITED_PMC_REQD; + + return 0; +} + +static void power_pmu_read(struct perf_event *event) +{ + s64 val, delta, prev; + + if (!event->hw.idx) + return; + /* + * Performance monitor interrupts come even when interrupts + * are soft-disabled, as long as interrupts are hard-enabled. + * Therefore we treat them like NMIs. + */ + do { + prev = atomic64_read(&event->hw.prev_count); + barrier(); + val = read_pmc(event->hw.idx); + } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); + + /* The events are only 32 bits wide */ + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &event->count); + atomic64_sub(delta, &event->hw.period_left); +} + +/* + * On some machines, PMC5 and PMC6 can't be written, don't respect + * the freeze conditions, and don't generate interrupts. This tells + * us if `event' is using such a PMC. + */ +static int is_limited_pmc(int pmcnum) +{ + return (ppmu->flags & PPMU_LIMITED_PMC5_6) + && (pmcnum == 5 || pmcnum == 6); +} + +static void freeze_limited_events(struct cpu_hw_events *cpuhw, + unsigned long pmc5, unsigned long pmc6) +{ + struct perf_event *event; + u64 val, prev, delta; + int i; + + for (i = 0; i < cpuhw->n_limited; ++i) { + event = cpuhw->limited_event[i]; + if (!event->hw.idx) + continue; + val = (event->hw.idx == 5) ? pmc5 : pmc6; + prev = atomic64_read(&event->hw.prev_count); + event->hw.idx = 0; + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &event->count); + } +} + +static void thaw_limited_events(struct cpu_hw_events *cpuhw, + unsigned long pmc5, unsigned long pmc6) +{ + struct perf_event *event; + u64 val; + int i; + + for (i = 0; i < cpuhw->n_limited; ++i) { + event = cpuhw->limited_event[i]; + event->hw.idx = cpuhw->limited_hwidx[i]; + val = (event->hw.idx == 5) ? pmc5 : pmc6; + atomic64_set(&event->hw.prev_count, val); + perf_event_update_userpage(event); + } +} + +/* + * Since limited events don't respect the freeze conditions, we + * have to read them immediately after freezing or unfreezing the + * other events. We try to keep the values from the limited + * events as consistent as possible by keeping the delay (in + * cycles and instructions) between freezing/unfreezing and reading + * the limited events as small and consistent as possible. + * Therefore, if any limited events are in use, we read them + * both, and always in the same order, to minimize variability, + * and do it inside the same asm that writes MMCR0. + */ +static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) +{ + unsigned long pmc5, pmc6; + + if (!cpuhw->n_limited) { + mtspr(SPRN_MMCR0, mmcr0); + return; + } + + /* + * Write MMCR0, then read PMC5 and PMC6 immediately. + * To ensure we don't get a performance monitor interrupt + * between writing MMCR0 and freezing/thawing the limited + * events, we first write MMCR0 with the event overflow + * interrupt enable bits turned off. + */ + asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5" + : "=&r" (pmc5), "=&r" (pmc6) + : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)), + "i" (SPRN_MMCR0), + "i" (SPRN_PMC5), "i" (SPRN_PMC6)); + + if (mmcr0 & MMCR0_FC) + freeze_limited_events(cpuhw, pmc5, pmc6); + else + thaw_limited_events(cpuhw, pmc5, pmc6); + + /* + * Write the full MMCR0 including the event overflow interrupt + * enable bits, if necessary. + */ + if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE)) + mtspr(SPRN_MMCR0, mmcr0); +} + +/* + * Disable all events to prevent PMU interrupts and to allow + * events to be added or removed. + */ +void hw_perf_disable(void) +{ + struct cpu_hw_events *cpuhw; + unsigned long flags; + + if (!ppmu) + return; + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_events); + + if (!cpuhw->disabled) { + cpuhw->disabled = 1; + cpuhw->n_added = 0; + + /* + * Check if we ever enabled the PMU on this cpu. + */ + if (!cpuhw->pmcs_enabled) { + ppc_enable_pmcs(); + cpuhw->pmcs_enabled = 1; + } + + /* + * Disable instruction sampling if it was enabled + */ + if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { + mtspr(SPRN_MMCRA, + cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); + mb(); + } + + /* + * Set the 'freeze events' bit. + * The barrier is to make sure the mtspr has been + * executed and the PMU has frozen the events + * before we return. + */ + write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC); + mb(); + } + local_irq_restore(flags); +} + +/* + * Re-enable all events if disable == 0. + * If we were previously disabled and events were added, then + * put the new config on the PMU. + */ +void hw_perf_enable(void) +{ + struct perf_event *event; + struct cpu_hw_events *cpuhw; + unsigned long flags; + long i; + unsigned long val; + s64 left; + unsigned int hwc_index[MAX_HWEVENTS]; + int n_lim; + int idx; + + if (!ppmu) + return; + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_events); + if (!cpuhw->disabled) { + local_irq_restore(flags); + return; + } + cpuhw->disabled = 0; + + /* + * If we didn't change anything, or only removed events, + * no need to recalculate MMCR* settings and reset the PMCs. + * Just reenable the PMU with the current MMCR* settings + * (possibly updated for removal of events). + */ + if (!cpuhw->n_added) { + mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); + mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); + if (cpuhw->n_events == 0) + ppc_set_pmu_inuse(0); + goto out_enable; + } + + /* + * Compute MMCR* values for the new set of events + */ + if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index, + cpuhw->mmcr)) { + /* shouldn't ever get here */ + printk(KERN_ERR "oops compute_mmcr failed\n"); + goto out; + } + + /* + * Add in MMCR0 freeze bits corresponding to the + * attr.exclude_* bits for the first event. + * We have already checked that all events have the + * same values for these bits as the first event. + */ + event = cpuhw->event[0]; + if (event->attr.exclude_user) + cpuhw->mmcr[0] |= MMCR0_FCP; + if (event->attr.exclude_kernel) + cpuhw->mmcr[0] |= freeze_events_kernel; + if (event->attr.exclude_hv) + cpuhw->mmcr[0] |= MMCR0_FCHV; + + /* + * Write the new configuration to MMCR* with the freeze + * bit set and set the hardware events to their initial values. + * Then unfreeze the events. + */ + ppc_set_pmu_inuse(1); + mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); + mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); + mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) + | MMCR0_FC); + + /* + * Read off any pre-existing events that need to move + * to another PMC. + */ + for (i = 0; i < cpuhw->n_events; ++i) { + event = cpuhw->event[i]; + if (event->hw.idx && event->hw.idx != hwc_index[i] + 1) { + power_pmu_read(event); + write_pmc(event->hw.idx, 0); + event->hw.idx = 0; + } + } + + /* + * Initialize the PMCs for all the new and moved events. + */ + cpuhw->n_limited = n_lim = 0; + for (i = 0; i < cpuhw->n_events; ++i) { + event = cpuhw->event[i]; + if (event->hw.idx) + continue; + idx = hwc_index[i] + 1; + if (is_limited_pmc(idx)) { + cpuhw->limited_event[n_lim] = event; + cpuhw->limited_hwidx[n_lim] = idx; + ++n_lim; + continue; + } + val = 0; + if (event->hw.sample_period) { + left = atomic64_read(&event->hw.period_left); + if (left < 0x80000000L) + val = 0x80000000L - left; + } + atomic64_set(&event->hw.prev_count, val); + event->hw.idx = idx; + write_pmc(idx, val); + perf_event_update_userpage(event); + } + cpuhw->n_limited = n_lim; + cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; + + out_enable: + mb(); + write_mmcr0(cpuhw, cpuhw->mmcr[0]); + + /* + * Enable instruction sampling if necessary + */ + if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { + mb(); + mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); + } + + out: + local_irq_restore(flags); +} + +static int collect_events(struct perf_event *group, int max_count, + struct perf_event *ctrs[], u64 *events, + unsigned int *flags) +{ + int n = 0; + struct perf_event *event; + + if (!is_software_event(group)) { + if (n >= max_count) + return -1; + ctrs[n] = group; + flags[n] = group->hw.event_base; + events[n++] = group->hw.config; + } + list_for_each_entry(event, &group->sibling_list, list_entry) { + if (!is_software_event(event) && + event->state != PERF_EVENT_STATE_OFF) { + if (n >= max_count) + return -1; + ctrs[n] = event; + flags[n] = event->hw.event_base; + events[n++] = event->hw.config; + } + } + return n; +} + +static void event_sched_in(struct perf_event *event, int cpu) +{ + event->state = PERF_EVENT_STATE_ACTIVE; + event->oncpu = cpu; + event->tstamp_running += event->ctx->time - event->tstamp_stopped; + if (is_software_event(event)) + event->pmu->enable(event); +} + +/* + * Called to enable a whole group of events. + * Returns 1 if the group was enabled, or -EAGAIN if it could not be. + * Assumes the caller has disabled interrupts and has + * frozen the PMU with hw_perf_save_disable. + */ +int hw_perf_group_sched_in(struct perf_event *group_leader, + struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx, int cpu) +{ + struct cpu_hw_events *cpuhw; + long i, n, n0; + struct perf_event *sub; + + if (!ppmu) + return 0; + cpuhw = &__get_cpu_var(cpu_hw_events); + n0 = cpuhw->n_events; + n = collect_events(group_leader, ppmu->n_event - n0, + &cpuhw->event[n0], &cpuhw->events[n0], + &cpuhw->flags[n0]); + if (n < 0) + return -EAGAIN; + if (check_excludes(cpuhw->event, cpuhw->flags, n0, n)) + return -EAGAIN; + i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0); + if (i < 0) + return -EAGAIN; + cpuhw->n_events = n0 + n; + cpuhw->n_added += n; + + /* + * OK, this group can go on; update event states etc., + * and enable any software events + */ + for (i = n0; i < n0 + n; ++i) + cpuhw->event[i]->hw.config = cpuhw->events[i]; + cpuctx->active_oncpu += n; + n = 1; + event_sched_in(group_leader, cpu); + list_for_each_entry(sub, &group_leader->sibling_list, list_entry) { + if (sub->state != PERF_EVENT_STATE_OFF) { + event_sched_in(sub, cpu); + ++n; + } + } + ctx->nr_active += n; + + return 1; +} + +/* + * Add a event to the PMU. + * If all events are not already frozen, then we disable and + * re-enable the PMU in order to get hw_perf_enable to do the + * actual work of reconfiguring the PMU. + */ +static int power_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw; + unsigned long flags; + int n0; + int ret = -EAGAIN; + + local_irq_save(flags); + perf_disable(); + + /* + * Add the event to the list (if there is room) + * and check whether the total set is still feasible. + */ + cpuhw = &__get_cpu_var(cpu_hw_events); + n0 = cpuhw->n_events; + if (n0 >= ppmu->n_event) + goto out; + cpuhw->event[n0] = event; + cpuhw->events[n0] = event->hw.config; + cpuhw->flags[n0] = event->hw.event_base; + if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) + goto out; + if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) + goto out; + + event->hw.config = cpuhw->events[n0]; + ++cpuhw->n_events; + ++cpuhw->n_added; + + ret = 0; + out: + perf_enable(); + local_irq_restore(flags); + return ret; +} + +/* + * Remove a event from the PMU. + */ +static void power_pmu_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw; + long i; + unsigned long flags; + + local_irq_save(flags); + perf_disable(); + + power_pmu_read(event); + + cpuhw = &__get_cpu_var(cpu_hw_events); + for (i = 0; i < cpuhw->n_events; ++i) { + if (event == cpuhw->event[i]) { + while (++i < cpuhw->n_events) + cpuhw->event[i-1] = cpuhw->event[i]; + --cpuhw->n_events; + ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr); + if (event->hw.idx) { + write_pmc(event->hw.idx, 0); + event->hw.idx = 0; + } + perf_event_update_userpage(event); + break; + } + } + for (i = 0; i < cpuhw->n_limited; ++i) + if (event == cpuhw->limited_event[i]) + break; + if (i < cpuhw->n_limited) { + while (++i < cpuhw->n_limited) { + cpuhw->limited_event[i-1] = cpuhw->limited_event[i]; + cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i]; + } + --cpuhw->n_limited; + } + if (cpuhw->n_events == 0) { + /* disable exceptions if no events are running */ + cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); + } + + perf_enable(); + local_irq_restore(flags); +} + +/* + * Re-enable interrupts on a event after they were throttled + * because they were coming too fast. + */ +static void power_pmu_unthrottle(struct perf_event *event) +{ + s64 val, left; + unsigned long flags; + + if (!event->hw.idx || !event->hw.sample_period) + return; + local_irq_save(flags); + perf_disable(); + power_pmu_read(event); + left = event->hw.sample_period; + event->hw.last_period = left; + val = 0; + if (left < 0x80000000L) + val = 0x80000000L - left; + write_pmc(event->hw.idx, val); + atomic64_set(&event->hw.prev_count, val); + atomic64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + perf_enable(); + local_irq_restore(flags); +} + +struct pmu power_pmu = { + .enable = power_pmu_enable, + .disable = power_pmu_disable, + .read = power_pmu_read, + .unthrottle = power_pmu_unthrottle, +}; + +/* + * Return 1 if we might be able to put event on a limited PMC, + * or 0 if not. + * A event can only go on a limited PMC if it counts something + * that a limited PMC can count, doesn't require interrupts, and + * doesn't exclude any processor mode. + */ +static int can_go_on_limited_pmc(struct perf_event *event, u64 ev, + unsigned int flags) +{ + int n; + u64 alt[MAX_EVENT_ALTERNATIVES]; + + if (event->attr.exclude_user + || event->attr.exclude_kernel + || event->attr.exclude_hv + || event->attr.sample_period) + return 0; + + if (ppmu->limited_pmc_event(ev)) + return 1; + + /* + * The requested event_id isn't on a limited PMC already; + * see if any alternative code goes on a limited PMC. + */ + if (!ppmu->get_alternatives) + return 0; + + flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD; + n = ppmu->get_alternatives(ev, flags, alt); + + return n > 0; +} + +/* + * Find an alternative event_id that goes on a normal PMC, if possible, + * and return the event_id code, or 0 if there is no such alternative. + * (Note: event_id code 0 is "don't count" on all machines.) + */ +static u64 normal_pmc_alternative(u64 ev, unsigned long flags) +{ + u64 alt[MAX_EVENT_ALTERNATIVES]; + int n; + + flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD); + n = ppmu->get_alternatives(ev, flags, alt); + if (!n) + return 0; + return alt[0]; +} + +/* Number of perf_events counting hardware events */ +static atomic_t num_events; +/* Used to avoid races in calling reserve/release_pmc_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* + * Release the PMU if this is the last perf_event. + */ +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (!atomic_add_unless(&num_events, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +/* + * Translate a generic cache event_id config to a raw event_id code. + */ +static int hw_perf_cache_event(u64 config, u64 *eventp) +{ + unsigned long type, op, result; + int ev; + + if (!ppmu->cache_events) + return -EINVAL; + + /* unpack config */ + type = config & 0xff; + op = (config >> 8) & 0xff; + result = (config >> 16) & 0xff; + + if (type >= PERF_COUNT_HW_CACHE_MAX || + op >= PERF_COUNT_HW_CACHE_OP_MAX || + result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + ev = (*ppmu->cache_events)[type][op][result]; + if (ev == 0) + return -EOPNOTSUPP; + if (ev == -1) + return -EINVAL; + *eventp = ev; + return 0; +} + +const struct pmu *hw_perf_event_init(struct perf_event *event) +{ + u64 ev; + unsigned long flags; + struct perf_event *ctrs[MAX_HWEVENTS]; + u64 events[MAX_HWEVENTS]; + unsigned int cflags[MAX_HWEVENTS]; + int n; + int err; + struct cpu_hw_events *cpuhw; + + if (!ppmu) + return ERR_PTR(-ENXIO); + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + ev = event->attr.config; + if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) + return ERR_PTR(-EOPNOTSUPP); + ev = ppmu->generic_events[ev]; + break; + case PERF_TYPE_HW_CACHE: + err = hw_perf_cache_event(event->attr.config, &ev); + if (err) + return ERR_PTR(err); + break; + case PERF_TYPE_RAW: + ev = event->attr.config; + break; + default: + return ERR_PTR(-EINVAL); + } + event->hw.config_base = ev; + event->hw.idx = 0; + + /* + * If we are not running on a hypervisor, force the + * exclude_hv bit to 0 so that we don't care what + * the user set it to. + */ + if (!firmware_has_feature(FW_FEATURE_LPAR)) + event->attr.exclude_hv = 0; + + /* + * If this is a per-task event, then we can use + * PM_RUN_* events interchangeably with their non RUN_* + * equivalents, e.g. PM_RUN_CYC instead of PM_CYC. + * XXX we should check if the task is an idle task. + */ + flags = 0; + if (event->ctx->task) + flags |= PPMU_ONLY_COUNT_RUN; + + /* + * If this machine has limited events, check whether this + * event_id could go on a limited event. + */ + if (ppmu->flags & PPMU_LIMITED_PMC5_6) { + if (can_go_on_limited_pmc(event, ev, flags)) { + flags |= PPMU_LIMITED_PMC_OK; + } else if (ppmu->limited_pmc_event(ev)) { + /* + * The requested event_id is on a limited PMC, + * but we can't use a limited PMC; see if any + * alternative goes on a normal PMC. + */ + ev = normal_pmc_alternative(ev, flags); + if (!ev) + return ERR_PTR(-EINVAL); + } + } + + /* + * If this is in a group, check if it can go on with all the + * other hardware events in the group. We assume the event + * hasn't been linked into its leader's sibling list at this point. + */ + n = 0; + if (event->group_leader != event) { + n = collect_events(event->group_leader, ppmu->n_event - 1, + ctrs, events, cflags); + if (n < 0) + return ERR_PTR(-EINVAL); + } + events[n] = ev; + ctrs[n] = event; + cflags[n] = flags; + if (check_excludes(ctrs, cflags, n, 1)) + return ERR_PTR(-EINVAL); + + cpuhw = &get_cpu_var(cpu_hw_events); + err = power_check_constraints(cpuhw, events, cflags, n + 1); + put_cpu_var(cpu_hw_events); + if (err) + return ERR_PTR(-EINVAL); + + event->hw.config = events[n]; + event->hw.event_base = cflags[n]; + event->hw.last_period = event->hw.sample_period; + atomic64_set(&event->hw.period_left, event->hw.last_period); + + /* + * See if we need to reserve the PMU. + * If no events are currently in use, then we have to take a + * mutex to ensure that we don't race with another task doing + * reserve_pmc_hardware or release_pmc_hardware. + */ + err = 0; + if (!atomic_inc_not_zero(&num_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_events) == 0 && + reserve_pmc_hardware(perf_event_interrupt)) + err = -EBUSY; + else + atomic_inc(&num_events); + mutex_unlock(&pmc_reserve_mutex); + } + event->destroy = hw_perf_event_destroy; + + if (err) + return ERR_PTR(err); + return &power_pmu; +} + +/* + * A event has overflowed; update its count and record + * things if requested. Note that interrupts are hard-disabled + * here so there is no possibility of being interrupted. + */ +static void record_and_restart(struct perf_event *event, unsigned long val, + struct pt_regs *regs, int nmi) +{ + u64 period = event->hw.sample_period; + s64 prev, delta, left; + int record = 0; + + /* we don't have to worry about interrupts here */ + prev = atomic64_read(&event->hw.prev_count); + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &event->count); + + /* + * See if the total period for this event has expired, + * and update for the next period. + */ + val = 0; + left = atomic64_read(&event->hw.period_left) - delta; + if (period) { + if (left <= 0) { + left += period; + if (left <= 0) + left = period; + record = 1; + } + if (left < 0x80000000LL) + val = 0x80000000LL - left; + } + + /* + * Finally record data if requested. + */ + if (record) { + struct perf_sample_data data = { + .addr = 0, + .period = event->hw.last_period, + }; + + if (event->attr.sample_type & PERF_SAMPLE_ADDR) + perf_get_data_addr(regs, &data.addr); + + if (perf_event_overflow(event, nmi, &data, regs)) { + /* + * Interrupts are coming too fast - throttle them + * by setting the event to 0, so it will be + * at least 2^30 cycles until the next interrupt + * (assuming each event counts at most 2 counts + * per cycle). + */ + val = 0; + left = ~0ULL >> 1; + } + } + + write_pmc(event->hw.idx, val); + atomic64_set(&event->hw.prev_count, val); + atomic64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); +} + +/* + * Called from generic code to get the misc flags (i.e. processor mode) + * for an event_id. + */ +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + u32 flags = perf_get_misc_flags(regs); + + if (flags) + return flags; + return user_mode(regs) ? PERF_RECORD_MISC_USER : + PERF_RECORD_MISC_KERNEL; +} + +/* + * Called from generic code to get the instruction pointer + * for an event_id. + */ +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + unsigned long ip; + + if (TRAP(regs) != 0xf00) + return regs->nip; /* not a PMU interrupt */ + + ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs); + return ip; +} + +/* + * Performance monitor interrupt stuff + */ +static void perf_event_interrupt(struct pt_regs *regs) +{ + int i; + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct perf_event *event; + unsigned long val; + int found = 0; + int nmi; + + if (cpuhw->n_limited) + freeze_limited_events(cpuhw, mfspr(SPRN_PMC5), + mfspr(SPRN_PMC6)); + + perf_read_regs(regs); + + nmi = perf_intr_is_nmi(regs); + if (nmi) + nmi_enter(); + else + irq_enter(); + + for (i = 0; i < cpuhw->n_events; ++i) { + event = cpuhw->event[i]; + if (!event->hw.idx || is_limited_pmc(event->hw.idx)) + continue; + val = read_pmc(event->hw.idx); + if ((int)val < 0) { + /* event has overflowed */ + found = 1; + record_and_restart(event, val, regs, nmi); + } + } + + /* + * In case we didn't find and reset the event that caused + * the interrupt, scan all events and reset any that are + * negative, to avoid getting continual interrupts. + * Any that we processed in the previous loop will not be negative. + */ + if (!found) { + for (i = 0; i < ppmu->n_event; ++i) { + if (is_limited_pmc(i + 1)) + continue; + val = read_pmc(i + 1); + if ((int)val < 0) + write_pmc(i + 1, 0); + } + } + + /* + * Reset MMCR0 to its normal value. This will set PMXE and + * clear FC (freeze events) and PMAO (perf mon alert occurred) + * and thus allow interrupts to occur again. + * XXX might want to use MSR.PM to keep the events frozen until + * we get back out of this interrupt. + */ + write_mmcr0(cpuhw, cpuhw->mmcr[0]); + + if (nmi) + nmi_exit(); + else + irq_exit(); +} + +void hw_perf_event_setup(int cpu) +{ + struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); + + if (!ppmu) + return; + memset(cpuhw, 0, sizeof(*cpuhw)); + cpuhw->mmcr[0] = MMCR0_FC; +} + +int register_power_pmu(struct power_pmu *pmu) +{ + if (ppmu) + return -EBUSY; /* something's already registered */ + + ppmu = pmu; + pr_info("%s performance monitor hardware support registered\n", + pmu->name); + +#ifdef MSR_HV + /* + * Use FCHV to ignore kernel events if MSR.HV is set. + */ + if (mfmsr() & MSR_HV) + freeze_events_kernel = MMCR0_FCHV; +#endif /* CONFIG_PPC64 */ + + return 0; +} diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index 3c90a3d9173e..2a361cdda635 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 31918af3e355..0f4c1c73a6ad 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index 867f6f663963..c351b3a57fbb 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index fa21890531da..ca399ba5034c 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index 018d094d92f9..28a4daacdc02 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 75dccb71a043..479574413a93 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 465e498bcb33..df45a7449a66 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -53,7 +53,7 @@ #include #include #include -#include +#include #include #include @@ -527,25 +527,25 @@ void __init iSeries_time_init_early(void) } #endif /* CONFIG_PPC_ISERIES */ -#if defined(CONFIG_PERF_COUNTERS) && defined(CONFIG_PPC32) -DEFINE_PER_CPU(u8, perf_counter_pending); +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32) +DEFINE_PER_CPU(u8, perf_event_pending); -void set_perf_counter_pending(void) +void set_perf_event_pending(void) { - get_cpu_var(perf_counter_pending) = 1; + get_cpu_var(perf_event_pending) = 1; set_dec(1); - put_cpu_var(perf_counter_pending); + put_cpu_var(perf_event_pending); } -#define test_perf_counter_pending() __get_cpu_var(perf_counter_pending) -#define clear_perf_counter_pending() __get_cpu_var(perf_counter_pending) = 0 +#define test_perf_event_pending() __get_cpu_var(perf_event_pending) +#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 -#else /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */ +#else /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ -#define test_perf_counter_pending() 0 -#define clear_perf_counter_pending() +#define test_perf_event_pending() 0 +#define clear_perf_event_pending() -#endif /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */ +#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ /* * For iSeries shared processors, we have to let the hypervisor @@ -573,9 +573,9 @@ void timer_interrupt(struct pt_regs * regs) set_dec(DECREMENTER_MAX); #ifdef CONFIG_PPC32 - if (test_perf_counter_pending()) { - clear_perf_counter_pending(); - perf_counter_do_pending(); + if (test_perf_event_pending()) { + clear_perf_event_pending(); + perf_event_do_pending(); } if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); -- cgit From 57c0c15b5244320065374ad2c54f4fbec77a6428 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 21 Sep 2009 12:20:38 +0200 Subject: perf: Tidy up after the big rename - provide compatibility Kconfig entry for existing PERF_COUNTERS .config's - provide courtesy copy of old perf_counter.h, for user-space projects - small indentation fixups - fix up MAINTAINERS - fix small x86 printout fallout - fix up small PowerPC comment fallout (use 'counter' as in register) Reviewed-by: Arjan van de Ven Acked-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_event.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index c98321fcb459..197b7d958796 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -41,7 +41,7 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); struct power_pmu *ppmu; /* - * Normally, to ignore kernel events we set the FCS (freeze events + * Normally, to ignore kernel events we set the FCS (freeze counters * in supervisor mode) bit in MMCR0, but if the kernel runs with the * hypervisor bit set in the MSR, or if we are running on a processor * where the hypervisor bit is forced to 1 (as on Apple G5 processors), @@ -159,7 +159,7 @@ void perf_event_print_debug(void) } /* - * Read one performance monitor event (PMC). + * Read one performance monitor counter (PMC). */ static unsigned long read_pmc(int idx) { @@ -409,7 +409,7 @@ static void power_pmu_read(struct perf_event *event) val = read_pmc(event->hw.idx); } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); - /* The events are only 32 bits wide */ + /* The counters are only 32 bits wide */ delta = (val - prev) & 0xfffffffful; atomic64_add(delta, &event->count); atomic64_sub(delta, &event->hw.period_left); @@ -543,7 +543,7 @@ void hw_perf_disable(void) } /* - * Set the 'freeze events' bit. + * Set the 'freeze counters' bit. * The barrier is to make sure the mtspr has been * executed and the PMU has frozen the events * before we return. @@ -1124,7 +1124,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) } /* - * A event has overflowed; update its count and record + * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled * here so there is no possibility of being interrupted. */ @@ -1271,7 +1271,7 @@ static void perf_event_interrupt(struct pt_regs *regs) /* * Reset MMCR0 to its normal value. This will set PMXE and - * clear FC (freeze events) and PMAO (perf mon alert occurred) + * clear FC (freeze counters) and PMAO (perf mon alert occurred) * and thus allow interrupts to occur again. * XXX might want to use MSR.PM to keep the events frozen until * we get back out of this interrupt. -- cgit From 411c94038594b2a3fd123d09bdec3fe2500e383d Mon Sep 17 00:00:00 2001 From: Anand Gadiyar Date: Tue, 7 Jul 2009 15:24:23 +0530 Subject: trivial: fix typo "for for" in multiple files trivial: fix typo "for for" in multiple files Signed-off-by: Anand Gadiyar Signed-off-by: Jiri Kosina --- arch/powerpc/kernel/udbg_16550.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index acb74a17bbbf..b4b167b33643 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -1,5 +1,5 @@ /* - * udbg for for NS16550 compatable serial ports + * udbg for NS16550 compatable serial ports * * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp * -- cgit From a8f90e906783f1f815120eefe813b23cb396e9bd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 22 Sep 2009 09:48:08 +1000 Subject: perf_event, powerpc: Fix compilation after big perf_counter rename This fixes two places in the powerpc perf_event (perf_counter) code where 'list_entry' needs to be changed to 'group_entry', but were missed in commit 65abc865 ("perf_counter: Rename list_entry -> group_entry, counter_list -> group_list"). This also changes 'event' back to 'counter' in a couple of contexts: * Field and function names that deal with the limited-function counters: it's really the hardware counters whose function is limited, not the events that they count. Hence: MAX_LIMITED_HWEVENTS -> MAX_LIMITED_HWCOUNTERS limited_event -> limited_counter freeze/thaw_limited_events -> freeze/thaw_limited_counters * The machine-specific PMU description struct (struct power_pmu): this renames 'n_event' back to 'n_counter' since it really describes how many hardware counters the machine has. (Renaming this back avoids a compile error in each of the machine-specific PMU back-ends where they initialize their power_pmu struct.) Signed-off-by: Paul Mackerras Cc: linuxppc-dev@ozlabs.org Cc: Peter Zijlstra LKML-Reference: <19128.4280.813369.589704@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_event.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 197b7d958796..bbcbae183e92 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -30,8 +30,8 @@ struct cpu_hw_events { u64 events[MAX_HWEVENTS]; unsigned int flags[MAX_HWEVENTS]; unsigned long mmcr[3]; - struct perf_event *limited_event[MAX_LIMITED_HWEVENTS]; - u8 limited_hwidx[MAX_LIMITED_HWEVENTS]; + struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS]; + u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; @@ -253,7 +253,7 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw, unsigned long addf = ppmu->add_fields; unsigned long tadd = ppmu->test_adder; - if (n_ev > ppmu->n_event) + if (n_ev > ppmu->n_counter) return -1; /* First see if the events will go on as-is */ @@ -426,7 +426,7 @@ static int is_limited_pmc(int pmcnum) && (pmcnum == 5 || pmcnum == 6); } -static void freeze_limited_events(struct cpu_hw_events *cpuhw, +static void freeze_limited_counters(struct cpu_hw_events *cpuhw, unsigned long pmc5, unsigned long pmc6) { struct perf_event *event; @@ -434,7 +434,7 @@ static void freeze_limited_events(struct cpu_hw_events *cpuhw, int i; for (i = 0; i < cpuhw->n_limited; ++i) { - event = cpuhw->limited_event[i]; + event = cpuhw->limited_counter[i]; if (!event->hw.idx) continue; val = (event->hw.idx == 5) ? pmc5 : pmc6; @@ -445,7 +445,7 @@ static void freeze_limited_events(struct cpu_hw_events *cpuhw, } } -static void thaw_limited_events(struct cpu_hw_events *cpuhw, +static void thaw_limited_counters(struct cpu_hw_events *cpuhw, unsigned long pmc5, unsigned long pmc6) { struct perf_event *event; @@ -453,7 +453,7 @@ static void thaw_limited_events(struct cpu_hw_events *cpuhw, int i; for (i = 0; i < cpuhw->n_limited; ++i) { - event = cpuhw->limited_event[i]; + event = cpuhw->limited_counter[i]; event->hw.idx = cpuhw->limited_hwidx[i]; val = (event->hw.idx == 5) ? pmc5 : pmc6; atomic64_set(&event->hw.prev_count, val); @@ -495,9 +495,9 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) "i" (SPRN_PMC5), "i" (SPRN_PMC6)); if (mmcr0 & MMCR0_FC) - freeze_limited_events(cpuhw, pmc5, pmc6); + freeze_limited_counters(cpuhw, pmc5, pmc6); else - thaw_limited_events(cpuhw, pmc5, pmc6); + thaw_limited_counters(cpuhw, pmc5, pmc6); /* * Write the full MMCR0 including the event overflow interrupt @@ -653,7 +653,7 @@ void hw_perf_enable(void) continue; idx = hwc_index[i] + 1; if (is_limited_pmc(idx)) { - cpuhw->limited_event[n_lim] = event; + cpuhw->limited_counter[n_lim] = event; cpuhw->limited_hwidx[n_lim] = idx; ++n_lim; continue; @@ -702,7 +702,7 @@ static int collect_events(struct perf_event *group, int max_count, flags[n] = group->hw.event_base; events[n++] = group->hw.config; } - list_for_each_entry(event, &group->sibling_list, list_entry) { + list_for_each_entry(event, &group->sibling_list, group_entry) { if (!is_software_event(event) && event->state != PERF_EVENT_STATE_OFF) { if (n >= max_count) @@ -742,7 +742,7 @@ int hw_perf_group_sched_in(struct perf_event *group_leader, return 0; cpuhw = &__get_cpu_var(cpu_hw_events); n0 = cpuhw->n_events; - n = collect_events(group_leader, ppmu->n_event - n0, + n = collect_events(group_leader, ppmu->n_counter - n0, &cpuhw->event[n0], &cpuhw->events[n0], &cpuhw->flags[n0]); if (n < 0) @@ -764,7 +764,7 @@ int hw_perf_group_sched_in(struct perf_event *group_leader, cpuctx->active_oncpu += n; n = 1; event_sched_in(group_leader, cpu); - list_for_each_entry(sub, &group_leader->sibling_list, list_entry) { + list_for_each_entry(sub, &group_leader->sibling_list, group_entry) { if (sub->state != PERF_EVENT_STATE_OFF) { event_sched_in(sub, cpu); ++n; @@ -797,7 +797,7 @@ static int power_pmu_enable(struct perf_event *event) */ cpuhw = &__get_cpu_var(cpu_hw_events); n0 = cpuhw->n_events; - if (n0 >= ppmu->n_event) + if (n0 >= ppmu->n_counter) goto out; cpuhw->event[n0] = event; cpuhw->events[n0] = event->hw.config; @@ -848,11 +848,11 @@ static void power_pmu_disable(struct perf_event *event) } } for (i = 0; i < cpuhw->n_limited; ++i) - if (event == cpuhw->limited_event[i]) + if (event == cpuhw->limited_counter[i]) break; if (i < cpuhw->n_limited) { while (++i < cpuhw->n_limited) { - cpuhw->limited_event[i-1] = cpuhw->limited_event[i]; + cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i]; cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i]; } --cpuhw->n_limited; @@ -1078,7 +1078,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) */ n = 0; if (event->group_leader != event) { - n = collect_events(event->group_leader, ppmu->n_event - 1, + n = collect_events(event->group_leader, ppmu->n_counter - 1, ctrs, events, cflags); if (n < 0) return ERR_PTR(-EINVAL); @@ -1230,7 +1230,7 @@ static void perf_event_interrupt(struct pt_regs *regs) int nmi; if (cpuhw->n_limited) - freeze_limited_events(cpuhw, mfspr(SPRN_PMC5), + freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), mfspr(SPRN_PMC6)); perf_read_regs(regs); @@ -1260,7 +1260,7 @@ static void perf_event_interrupt(struct pt_regs *regs) * Any that we processed in the previous loop will not be negative. */ if (!found) { - for (i = 0; i < ppmu->n_event; ++i) { + for (i = 0; i < ppmu->n_counter; ++i) { if (is_limited_pmc(i + 1)) continue; val = read_pmc(i + 1); -- cgit From 88e9d34c727883d7d6f02cf1475b3ec98b8480c7 Mon Sep 17 00:00:00 2001 From: James Morris Date: Tue, 22 Sep 2009 16:43:43 -0700 Subject: seq_file: constify seq_operations Make all seq_operations structs const, to help mitigate against revectoring user-triggerable function pointers. This is derived from the grsecurity patch, although generated from scratch because it's simpler than extracting the changes from there. Signed-off-by: James Morris Acked-by: Serge Hallyn Acked-by: Casey Schaufler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/setup-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 02fed27af7f6..1d5570a1e456 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -328,7 +328,7 @@ static void c_stop(struct seq_file *m, void *v) { } -struct seq_operations cpuinfo_op = { +const struct seq_operations cpuinfo_op = { .start =c_start, .next = c_next, .stop = c_stop, -- cgit From f063ea02fba5782099b6730d5733ee44638df8f9 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:45 -0600 Subject: cpumask: arch_send_call_function_ipi_mask: powerpc We're weaning the core code off handing cpumask's around on-stack. This introduces arch_send_call_function_ipi_mask(), and by defining it, the old arch_send_call_function_ipi is defined by the core code. Signed-off-by: Rusty Russell --- arch/powerpc/kernel/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index d387b3937ccc..7f68ceb3bdb8 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -189,11 +189,11 @@ void arch_send_call_function_single_ipi(int cpu) smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE); } -void arch_send_call_function_ipi(cpumask_t mask) +void arch_send_call_function_ipi_mask(const struct cpumask *mask) { unsigned int cpu; - for_each_cpu_mask(cpu, mask) + for_each_cpu(cpu, mask) smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNCTION); } -- cgit From ea0f1cab6ed43121ff6f24c1bb02e88a8d11a2d6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:48 -0600 Subject: cpumask: Use accessors for cpu_*_mask: powerpc Use the accessors rather than frobbing bits directly (the new versions are const). Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- arch/powerpc/kernel/setup-common.c | 6 +++--- arch/powerpc/kernel/smp.c | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 1d5570a1e456..9837d9153c4b 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -432,9 +432,9 @@ void __init smp_setup_cpu_maps(void) for (j = 0; j < nthreads && cpu < NR_CPUS; j++) { DBG(" thread %d -> cpu %d (hard id %d)\n", j, cpu, intserv[j]); - cpu_set(cpu, cpu_present_map); + set_cpu_present(cpu, true); set_hard_smp_processor_id(cpu, intserv[j]); - cpu_set(cpu, cpu_possible_map); + set_cpu_possible(cpu, true); cpu++; } } @@ -480,7 +480,7 @@ void __init smp_setup_cpu_maps(void) maxcpus); for (cpu = 0; cpu < maxcpus; cpu++) - cpu_set(cpu, cpu_possible_map); + set_cpu_possible(cpu, true); out: of_node_put(dn); } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 7f68ceb3bdb8..9b86a74d2815 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -287,7 +287,7 @@ void __devinit smp_prepare_boot_cpu(void) { BUG_ON(smp_processor_id() != boot_cpuid); - cpu_set(boot_cpuid, cpu_online_map); + set_cpu_online(boot_cpuid, true); cpu_set(boot_cpuid, per_cpu(cpu_sibling_map, boot_cpuid)); cpu_set(boot_cpuid, per_cpu(cpu_core_map, boot_cpuid)); #ifdef CONFIG_PPC64 @@ -307,7 +307,7 @@ int generic_cpu_disable(void) if (cpu == boot_cpuid) return -EBUSY; - cpu_clear(cpu, cpu_online_map); + set_cpu_online(cpu, false); #ifdef CONFIG_PPC64 vdso_data->processorCount--; fixup_irqs(cpu_online_map); @@ -361,7 +361,7 @@ void generic_mach_cpu_die(void) smp_wmb(); while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) cpu_relax(); - cpu_set(cpu, cpu_online_map); + set_cpu_online(cpu, true); local_irq_enable(); } #endif @@ -508,7 +508,7 @@ int __devinit start_secondary(void *unused) ipi_call_lock(); notify_cpu_starting(cpu); - cpu_set(cpu, cpu_online_map); + set_cpu_online(cpu, true); /* Update sibling maps */ base = cpu_first_thread_in_core(cpu); for (i = 0; i < threads_per_core; i++) { -- cgit From 2bcd57ab61e7cabed626226a3771617981c11ce1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 24 Sep 2009 04:22:25 +0400 Subject: headers: utsname.h redux * remove asm/atomic.h inclusion from linux/utsname.h -- not needed after kref conversion * remove linux/utsname.h inclusion from files which do not need it NOTE: it looks like fs/binfmt_elf.c do not need utsname.h, however due to some personality stuff it _is_ needed -- cowardly leave ELF-related headers and files alone. Signed-off-by: Alexey Dobriyan Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/setup-common.c | 1 - arch/powerpc/kernel/sys_ppc32.c | 1 - 2 files changed, 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 1d5570a1e456..74cd1a7d0d4b 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 1cc5e9e5da96..b97c2d67f4ac 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include -- cgit From 0f3372741f2de8dc85a60be737b519a47b395b85 Mon Sep 17 00:00:00 2001 From: roel kluin Date: Wed, 9 Sep 2009 05:02:24 +0000 Subject: powerpc: kmalloc failure ignored in vio_build_iommu_table() Prevent NULL dereference if kmalloc() fails. Signed-off-by: Roel Kluin Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vio.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index bc7b41edbdfc..1b4f674ad7c4 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1054,6 +1054,8 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) return NULL; tbl = kmalloc(sizeof(*tbl), GFP_KERNEL); + if (tbl == NULL) + return NULL; of_parse_dma_window(dev->dev.archdata.of_node, dma_window, &tbl->it_index, &offset, &size); -- cgit From 5c8f382c0b96aedcd709c05eae13bd684e16417e Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Wed, 16 Sep 2009 03:08:58 +0000 Subject: powerpc/book3e-64: Remove duplicated #include Remove duplicated #include('s) in arch/powerpc/kernel/exceptions-64e.S Signed-off-by: Huang Weiyi Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64e.S | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 9048f96237f6..24dcc0ecf246 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include -- cgit From 1cebd7a0f62804ca24f7b7b35e8105000b9e879a Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Mon, 21 Sep 2009 08:26:34 +0000 Subject: powerpc: Rename get_dma_direct_offset get_dma_offset The former is no longer really accurate with the swiotlb case now a possibility. I also move it into dma-mapping.h - it no longer needs to be in dma.c, and there are about to be some more accessors that should all end up in the same place. A comment is added to indicate that this function is not used in configs where there is no simple dma offset, such as the iommu case. Signed-off-by: Becky Bruce Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/dma.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 21b784d7e7d0..6215062caf8c 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -21,13 +21,6 @@ * default the offset is PCI_DRAM_OFFSET. */ -unsigned long get_dma_direct_offset(struct device *dev) -{ - if (dev) - return (unsigned long)dev->archdata.dma_data; - - return PCI_DRAM_OFFSET; -} void *dma_direct_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) @@ -37,7 +30,7 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size, ret = __dma_alloc_coherent(dev, size, dma_handle, flag); if (ret == NULL) return NULL; - *dma_handle += get_dma_direct_offset(dev); + *dma_handle += get_dma_offset(dev); return ret; #else struct page *page; @@ -51,7 +44,7 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size, return NULL; ret = page_address(page); memset(ret, 0, size); - *dma_handle = virt_to_abs(ret) + get_dma_direct_offset(dev); + *dma_handle = virt_to_abs(ret) + get_dma_offset(dev); return ret; #endif @@ -75,7 +68,7 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int i; for_each_sg(sgl, sg, nents, i) { - sg->dma_address = sg_phys(sg) + get_dma_direct_offset(dev); + sg->dma_address = sg_phys(sg) + get_dma_offset(dev); sg->dma_length = sg->length; __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction); } @@ -110,7 +103,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev, { BUG_ON(dir == DMA_NONE); __dma_sync_page(page, offset, size, dir); - return page_to_phys(page) + offset + get_dma_direct_offset(dev); + return page_to_phys(page) + offset + get_dma_offset(dev); } static inline void dma_direct_unmap_page(struct device *dev, -- cgit From 738ef42e32fe95553a424c04016b936c9f6c9afb Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Mon, 21 Sep 2009 08:26:35 +0000 Subject: powerpc: Change archdata dma_data to a union Sometimes this is used to hold a simple offset, and sometimes it is used to hold a pointer. This patch changes it to a union containing void * and dma_addr_t. get/set accessors are also provided, because it was getting a bit ugly to get to the actual data. Signed-off-by: Becky Bruce Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/dma-iommu.c | 16 ++++++++-------- arch/powerpc/kernel/pci-common.c | 2 +- arch/powerpc/kernel/vio.c | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 87ddb3fb948c..37771a518119 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -18,7 +18,7 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) { - return iommu_alloc_coherent(dev, dev->archdata.dma_data, size, + return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size, dma_handle, device_to_mask(dev), flag, dev_to_node(dev)); } @@ -26,7 +26,7 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size, static void dma_iommu_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) { - iommu_free_coherent(dev->archdata.dma_data, size, vaddr, dma_handle); + iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle); } /* Creates TCEs for a user provided buffer. The user buffer must be @@ -39,8 +39,8 @@ static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page, enum dma_data_direction direction, struct dma_attrs *attrs) { - return iommu_map_page(dev, dev->archdata.dma_data, page, offset, size, - device_to_mask(dev), direction, attrs); + return iommu_map_page(dev, get_iommu_table_base(dev), page, offset, + size, device_to_mask(dev), direction, attrs); } @@ -48,7 +48,7 @@ static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction, struct dma_attrs *attrs) { - iommu_unmap_page(dev->archdata.dma_data, dma_handle, size, direction, + iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction, attrs); } @@ -57,7 +57,7 @@ static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, struct dma_attrs *attrs) { - return iommu_map_sg(dev, dev->archdata.dma_data, sglist, nelems, + return iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems, device_to_mask(dev), direction, attrs); } @@ -65,14 +65,14 @@ static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, struct dma_attrs *attrs) { - iommu_unmap_sg(dev->archdata.dma_data, sglist, nelems, direction, + iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems, direction, attrs); } /* We support DMA to/from any memory page via the iommu */ static int dma_iommu_dma_supported(struct device *dev, u64 mask) { - struct iommu_table *tbl = dev->archdata.dma_data; + struct iommu_table *tbl = get_iommu_table_base(dev); if (!tbl || tbl->it_offset > mask) { printk(KERN_INFO diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index e9f4840096b3..bb8209e34931 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1117,7 +1117,7 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus) /* Hook up default DMA ops */ sd->dma_ops = pci_dma_ops; - sd->dma_data = (void *)PCI_DRAM_OFFSET; + set_dma_offset(&dev->dev, PCI_DRAM_OFFSET); /* Additional platform DMA/iommu setup */ if (ppc_md.pci_dma_dev_setup) diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 1b4f674ad7c4..77f64218abf3 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1235,7 +1235,7 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) vio_cmo_set_dma_ops(viodev); else viodev->dev.archdata.dma_ops = &dma_iommu_ops; - viodev->dev.archdata.dma_data = vio_build_iommu_table(viodev); + set_iommu_table_base(&viodev->dev, vio_build_iommu_table(viodev)); set_dev_node(&viodev->dev, of_node_to_nid(of_node)); /* init generic 'struct device' fields: */ -- cgit From 8bbde7a7062facf8af35bcc9a64cbafe8f36f3cf Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 21 Sep 2009 16:52:35 +0000 Subject: powerpc: Move 64bit heap above 1TB on machines with 1TB segments If we are using 1TB segments and we are allowed to randomise the heap, we can put it above 1TB so it is backed by a 1TB segment. Otherwise the heap will be in the bottom 1TB which always uses 256MB segments and this may result in a performance penalty. This functionality is disabled when heap randomisation is turned off: echo 1 > /proc/sys/kernel/randomize_va_space which may be useful when trying to allocate the maximum amount of 16M or 16G pages. On a microbenchmark that repeatedly touches 32GB of memory with a stride of 256MB + 4kB (designed to stress 256MB segments while still mapping nicely into the L1 cache), we see the improvement: Force malloc to use heap all the time: # export MALLOC_MMAP_MAX_=0 MALLOC_TRIM_THRESHOLD_=-1 Disable heap randomization: # echo 1 > /proc/sys/kernel/randomize_va_space # time ./test 12.51s Enable heap randomization: # echo 2 > /proc/sys/kernel/randomize_va_space # time ./test 1.70s Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/process.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 0a3216433051..1168c5f440ab 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1165,7 +1165,22 @@ static inline unsigned long brk_rnd(void) unsigned long arch_randomize_brk(struct mm_struct *mm) { - unsigned long ret = PAGE_ALIGN(mm->brk + brk_rnd()); + unsigned long base = mm->brk; + unsigned long ret; + +#ifdef CONFIG_PPC64 + /* + * If we are using 1TB segments and we are allowed to randomise + * the heap, we can put it above 1TB so it is backed by a 1TB + * segment. Otherwise the heap will be in the bottom 1TB + * which always uses 256MB segments and this may result in a + * performance penalty. + */ + if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T)) + base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T); +#endif + + ret = PAGE_ALIGN(base + brk_rnd()); if (ret < mm->brk) return mm->brk; -- cgit From f2053f1a7bf6005b4e81826b1ac8d0b4117c4cf0 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 21 Sep 2009 16:57:40 +0000 Subject: powerpc/perf_counter: Fix vdso detection perf_counter uses arch_vma_name() to detect a vdso region which in turn uses current->mm->context.vdso_base. We need to initialise this before doing the mmap or else we fail to detect the vdso. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vdso.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 3faaf29bdb29..94e2df3cae07 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -240,6 +240,13 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) goto fail_mmapsem; } + /* + * Put vDSO base into mm struct. We need to do this before calling + * install_special_mapping or the perf counter mmap tracking code + * will fail to recognise it as a vDSO (since arch_vma_name fails). + */ + current->mm->context.vdso_base = vdso_base; + /* * our vma flags don't have VM_WRITE so by default, the process isn't * allowed to write those pages. @@ -260,11 +267,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| VM_ALWAYSDUMP, vdso_pagelist); - if (rc) + if (rc) { + current->mm->context.vdso_base = 0; goto fail_mmapsem; - - /* Put vDSO base into mm struct */ - current->mm->context.vdso_base = vdso_base; + } up_write(&mm->mmap_sem); return 0; -- cgit From 049d0497060bc8db944f7b4984271327448b3603 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 21 Sep 2009 20:47:39 +0000 Subject: powerpc: Fix ibm,client-architecture-support printout On machines without the ibm,client-architecture-support call we were missing a newline. We may as well print the full name in all its glory too - its ibm,client-architecture-support, not ibm,client-architecture as I mistakenly wrote (a name only an IBM architect could love). For my penance I will write out ibm,client-architecture-support 100 times. Before: Calling ibm,client-architecture...command line: root=/dev/sda6 console=hvc0 quiet After: Calling ibm,client-architecture-support... not implemented command line: root=/dev/sda6 console=hvc0 Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 864334b337a3..bafac2e41ae1 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -800,7 +800,7 @@ static void __init prom_send_capabilities(void) root = call_prom("open", 1, 1, ADDR("/")); if (root != 0) { /* try calling the ibm,client-architecture-support method */ - prom_printf("Calling ibm,client-architecture..."); + prom_printf("Calling ibm,client-architecture-support..."); if (call_prom_ret("call-method", 3, 2, &ret, ADDR("ibm,client-architecture-support"), root, @@ -814,6 +814,7 @@ static void __init prom_send_capabilities(void) return; } call_prom("close", 1, 0, root); + prom_printf(" not implemented\n"); } /* no ibm,client-architecture-support call, try the old way */ -- cgit From 142597dbbd8a1d516af3dacfa00037f21612e865 Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Tue, 22 Sep 2009 05:18:09 +0000 Subject: powerpc: Cleanup linker script using new linker script macros. Signed-off-by: Tim Abbott Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: linuxppc-dev@ozlabs.org Cc: Sam Ravnborg Acked-by: Sam Ravnborg Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vmlinux.lds.S | 69 +++++++++------------------------------ 1 file changed, 15 insertions(+), 54 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 58da4070723d..f56429362a12 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -6,6 +6,7 @@ #include #include #include +#include ENTRY(_stext) @@ -71,12 +72,7 @@ SECTIONS /* Read-only data */ RODATA - /* Exception & bug tables */ - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } + EXCEPTION_TABLE(0) NOTES :kernel :notes @@ -93,12 +89,7 @@ SECTIONS */ . = ALIGN(PAGE_SIZE); __init_begin = .; - - .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { - _sinittext = .; - INIT_TEXT - _einittext = .; - } :kernel + INIT_TEXT_SECTION(PAGE_SIZE) :kernel /* .exit.text is discarded at runtime, not link time, * to deal with references from __bug_table @@ -122,23 +113,16 @@ SECTIONS #endif } - . = ALIGN(16); .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { - __setup_start = .; - *(.init.setup) - __setup_end = .; + INIT_SETUP(16) } .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { - __initcall_start = .; - INITCALLS - __initcall_end = .; - } + INIT_CALLS + } .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; + CON_INITCALL } SECURITY_INIT @@ -169,14 +153,10 @@ SECTIONS __stop___fw_ftr_fixup = .; } #endif -#ifdef CONFIG_BLK_DEV_INITRD - . = ALIGN(PAGE_SIZE); .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { - __initramfs_start = .; - *(.init.ramfs) - __initramfs_end = .; + INIT_RAM_FS } -#endif + PERCPU(PAGE_SIZE) . = ALIGN(8); @@ -240,36 +220,24 @@ SECTIONS #endif /* The initial task and kernel stack */ -#ifdef CONFIG_PPC32 - . = ALIGN(8192); -#else - . = ALIGN(16384); -#endif .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { - *(.data.init_task) + INIT_TASK_DATA(THREAD_SIZE) } - . = ALIGN(PAGE_SIZE); .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { - *(.data.page_aligned) + PAGE_ALIGNED_DATA(PAGE_SIZE) } - . = ALIGN(L1_CACHE_BYTES); .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { - *(.data.cacheline_aligned) + CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) } - . = ALIGN(L1_CACHE_BYTES); .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { - *(.data.read_mostly) + READ_MOSTLY_DATA(L1_CACHE_BYTES) } - . = ALIGN(PAGE_SIZE); .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { - __nosave_begin = .; - *(.data.nosave) - . = ALIGN(PAGE_SIZE); - __nosave_end = .; + NOSAVE_DATA } . = ALIGN(PAGE_SIZE); @@ -280,14 +248,7 @@ SECTIONS * And finally the bss */ - .bss : AT(ADDR(.bss) - LOAD_OFFSET) { - __bss_start = .; - *(.sbss) *(.scommon) - *(.dynbss) - *(.bss) - *(COMMON) - __bss_stop = .; - } + BSS_SECTION(0, 0, 0) . = ALIGN(PAGE_SIZE); _end = . ; -- cgit