From 8eb9803723a14fd12675641b953e4ccbd86187a8 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 29 May 2016 22:03:50 +1000 Subject: powerpc: Avoid load hit store in __giveup_fpu() and __giveup_altivec() In both __giveup_fpu() and __giveup_altivec() we make two modifications to tsk->thread.regs->msr. gcc decides to do a read/modify/write of each change, so we end up with a load hit store: ld r9,264(r10) rldicl r9,r9,50,1 rotldi r9,r9,14 std r9,264(r10) ... ld r9,264(r10) rldicl r9,r9,40,1 rotldi r9,r9,24 std r9,264(r10) Fix this by using a temporary. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e2f12cbcade9..a2dd3b1276ff 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -139,12 +139,16 @@ EXPORT_SYMBOL(__msr_check_and_clear); #ifdef CONFIG_PPC_FPU void __giveup_fpu(struct task_struct *tsk) { + unsigned long msr; + save_fpu(tsk); - tsk->thread.regs->msr &= ~MSR_FP; + msr = tsk->thread.regs->msr; + msr &= ~MSR_FP; #ifdef CONFIG_VSX if (cpu_has_feature(CPU_FTR_VSX)) - tsk->thread.regs->msr &= ~MSR_VSX; + msr &= ~MSR_VSX; #endif + tsk->thread.regs->msr = msr; } void giveup_fpu(struct task_struct *tsk) @@ -219,12 +223,16 @@ static int restore_fp(struct task_struct *tsk) { return 0; } static void __giveup_altivec(struct task_struct *tsk) { + unsigned long msr; + save_altivec(tsk); - tsk->thread.regs->msr &= ~MSR_VEC; + msr = tsk->thread.regs->msr; + msr &= ~MSR_VEC; #ifdef CONFIG_VSX if (cpu_has_feature(CPU_FTR_VSX)) - tsk->thread.regs->msr &= ~MSR_VSX; + msr &= ~MSR_VSX; #endif + tsk->thread.regs->msr = msr; } void giveup_altivec(struct task_struct *tsk) -- cgit From d96f234f47aff593538f9e3d674967078f56bc28 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 29 May 2016 22:03:51 +1000 Subject: powerpc: Avoid load hit store in setup_sigcontext() In setup_sigcontext(), we set current->thread.vrsave then use it straight after. Since current is hidden from the compiler via inline assembly, it cannot optimise this and we end up with a load hit store. Fix this by using a temporary. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/signal_64.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 25520794aa37..7e49984d4331 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -104,6 +104,7 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, */ #ifdef CONFIG_ALTIVEC elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc); + unsigned long vrsave; #endif unsigned long msr = regs->msr; long err = 0; @@ -125,9 +126,13 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, /* We always copy to/from vrsave, it's 0 if we don't have or don't * use altivec. */ - if (cpu_has_feature(CPU_FTR_ALTIVEC)) - current->thread.vrsave = mfspr(SPRN_VRSAVE); - err |= __put_user(current->thread.vrsave, (u32 __user *)&v_regs[33]); + vrsave = 0; + if (cpu_has_feature(CPU_FTR_ALTIVEC)) { + vrsave = mfspr(SPRN_VRSAVE); + current->thread.vrsave = vrsave; + } + + err |= __put_user(vrsave, (u32 __user *)&v_regs[33]); #else /* CONFIG_ALTIVEC */ err |= __put_user(0, &sc->v_regs); #endif /* CONFIG_ALTIVEC */ -- cgit From e289086f6530dd85d88967bfceded98bdbcd7f41 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 23 May 2016 10:45:49 +0200 Subject: powerpc/32: Get rid of sub_reloc_offset() sub_reloc_offset() has not been used since commit 917f0af9e5a9 ("powerpc: Remove arch/ppc and include/asm-ppc") which removed include/asm-ppc/prom.h. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/misc_32.S | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 285ca8c6cc2e..d9c912b6e632 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -103,20 +103,6 @@ _GLOBAL(mulhdu) addze r3,r10 blr -/* - * sub_reloc_offset(x) returns x - reloc_offset(). - */ -_GLOBAL(sub_reloc_offset) - mflr r0 - bl 1f -1: mflr r5 - lis r4,1b@ha - addi r4,r4,1b@l - subf r5,r4,r5 - subf r3,r5,r3 - mtlr r0 - blr - /* * reloc_got2 runs through the .got2 section adding an offset * to each entry. -- cgit From 027dfac694fc27ef0273afb810d9b1f9da57d6e1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 1 Jun 2016 16:34:37 +1000 Subject: powerpc: Various typo fixes Signed-off-by: Andrea Gelmini Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/cpu_setup_6xx.S | 2 +- arch/powerpc/kernel/eeh_driver.c | 2 +- arch/powerpc/kernel/exceptions-64e.S | 2 +- arch/powerpc/kernel/pci_64.c | 2 +- arch/powerpc/kernel/process.c | 2 +- arch/powerpc/kernel/rtas-proc.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index f8cd9fba4d35..c5e5a94d9892 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -156,7 +156,7 @@ setup_7410_workarounds: blr /* 740/750/7400/7410 - * Enable Store Gathering (SGE), Address Brodcast (ABE), + * Enable Store Gathering (SGE), Address Broadcast (ABE), * Branch History Table (BHTE), Branch Target ICache (BTIC) * Dynamic Power Management (DPM), Speculative (SPD) * Clear Instruction cache throttling (ICTC) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 2714a3b81d24..389b0d3988dc 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -139,7 +139,7 @@ static void eeh_enable_irq(struct pci_dev *dev) * into it. * * That's just wrong.The warning in the core code is - * there to tell people to fix their assymetries in + * there to tell people to fix their asymmetries in * their own code, not by abusing the core information * to avoid it. * diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 488e6314f993..2d3b40fd9bac 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -453,7 +453,7 @@ exc_##n##_bad_stack: \ sth r1,PACA_TRAP_SAVE(r13); /* store trap */ \ b bad_stack_book3e; /* bad stack error */ -/* WARNING: If you change the layout of this stub, make sure you chcek +/* WARNING: If you change the layout of this stub, make sure you check * the debug exception handler which handles single stepping * into exceptions from userspace, and the MM code in * arch/powerpc/mm/tlb_nohash.c which patches the branch here diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 3759df52bd67..f71b79a8992b 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -82,7 +82,7 @@ int pcibios_unmap_io_space(struct pci_bus *bus) /* If this is not a PHB, we only flush the hash table over * the area mapped by this bridge. We don't play with the PTE - * mappings since we might have to deal with sub-page alignemnts + * mappings since we might have to deal with sub-page alignments * so flushing the hash table is the only sane way to make sure * that no hash entries are covering that removed bridge area * while still allowing other busses overlapping those pages diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index a2dd3b1276ff..c5c3ae2ef3c1 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -802,7 +802,7 @@ static void tm_reclaim_thread(struct thread_struct *thr, * this state. * We do this using the current MSR, rather tracking it in * some specific thread_struct bit, as it has the additional - * benifit of checking for a potential TM bad thing exception. + * benefit of checking for a potential TM bad thing exception. */ if (!MSR_TM_SUSPENDED(mfmsr())) return; diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index fb2fb3ea85e5..c82eed97bd22 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -698,7 +698,7 @@ static void check_location(struct seq_file *m, const char *c) /* * Format: * ${LETTER}${NUMBER}[[-/]${LETTER}${NUMBER} [ ... ] ] - * the '.' may be an abbrevation + * the '.' may be an abbreviation */ static void check_location_string(struct seq_file *m, const char *c) { -- cgit From f55d966536034d33476fdd43c45d47225344469f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 6 Jun 2016 22:26:10 +0530 Subject: powerpc: Define and use PPC64_ELF_ABI_v2/v1 We're approaching 20 locations where we need to check for ELF ABI v2. That's fine, except the logic is a bit awkward, because we have to check that _CALL_ELF is defined and then what its value is. So check it once in asm/types.h and define PPC64_ELF_ABI_v2 when ELF ABI v2 is detected. We also have a few places where what we're really trying to check is that we are using the 64-bit v1 ABI, ie. function descriptors. So also add a #define for that, which simplifies several checks. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_64.S | 2 +- arch/powerpc/kernel/ftrace.c | 4 ++-- arch/powerpc/kernel/head_64.S | 2 +- arch/powerpc/kernel/kprobes.c | 6 ++---- arch/powerpc/kernel/misc_64.S | 2 +- arch/powerpc/kernel/module_64.c | 4 ++-- 6 files changed, 9 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 73e461a3dfbb..2e0c565754aa 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -453,7 +453,7 @@ _GLOBAL(ret_from_kernel_thread) REST_NVGPRS(r1) mtlr r14 mr r3,r15 -#if defined(_CALL_ELF) && _CALL_ELF == 2 +#ifdef PPC64_ELF_ABI_v2 mr r12,r14 #endif blrl diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 1123a4d8d8dd..7af6c4de044b 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -608,7 +608,7 @@ unsigned long __init arch_syscall_addr(int nr) } #endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_PPC64 */ -#if defined(CONFIG_PPC64) && (!defined(_CALL_ELF) || _CALL_ELF != 2) +#ifdef PPC64_ELF_ABI_v1 char *arch_ftrace_match_adjust(char *str, const char *search) { if (str[0] == '.' && search[0] != '.') @@ -616,4 +616,4 @@ char *arch_ftrace_match_adjust(char *str, const char *search) else return str; } -#endif /* defined(CONFIG_PPC64) && (!defined(_CALL_ELF) || _CALL_ELF != 2) */ +#endif /* PPC64_ELF_ABI_v1 */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 2d14774af6b4..064cd9397836 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -401,7 +401,7 @@ generic_secondary_common_init: ld r12,CPU_SPEC_RESTORE(r23) cmpdi 0,r12,0 beq 3f -#if !defined(_CALL_ELF) || _CALL_ELF != 2 +#ifdef PPC64_ELF_ABI_v1 ld r12,0(r12) #endif mtctr r12 diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 7c053f281406..7d48e3baa38b 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -506,12 +506,10 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) /* setup return addr to the jprobe handler routine */ regs->nip = arch_deref_entry_point(jp->entry); -#ifdef CONFIG_PPC64 -#if defined(_CALL_ELF) && _CALL_ELF == 2 +#ifdef PPC64_ELF_ABI_v2 regs->gpr[12] = (unsigned long)jp->entry; -#else +#elif defined(PPC64_ELF_ABI_v1) regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc); -#endif #endif return 1; diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index f28754c497e5..7a8519052b14 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -661,7 +661,7 @@ _GLOBAL(kexec_sequence) #ifndef CONFIG_PPC_BOOK3E /* clear out hardware hash page table and tlb */ -#if !defined(_CALL_ELF) || _CALL_ELF != 2 +#ifdef PPC64_ELF_ABI_v1 ld r12,0(r27) /* deref function descriptor */ #else mr r12,r27 diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 9ce9a25f58b5..f703f343358e 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -41,7 +41,7 @@ this, and makes other things simpler. Anton? --RR. */ -#if defined(_CALL_ELF) && _CALL_ELF == 2 +#ifdef PPC64_ELF_ABI_v2 /* An address is simply the address of the function. */ typedef unsigned long func_desc_t; @@ -132,7 +132,7 @@ static u32 ppc64_stub_insns[] = { /* Save current r2 value in magic place on the stack. */ 0xf8410000|R2_STACK_OFFSET, /* std r2,R2_STACK_OFFSET(r1) */ 0xe98b0020, /* ld r12,32(r11) */ -#if !defined(_CALL_ELF) || _CALL_ELF != 2 +#ifdef PPC64_ELF_ABI_v1 /* Set up new r2 from function descriptor */ 0xe84b0028, /* ld r2,40(r11) */ #endif -- cgit From 6e45273eacc829a44fae1d3df14065d6947335ae Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 20 May 2016 16:18:57 +0200 Subject: powerpc/pseries: Fix trivial typo in function name Signed-off-by: Greg Kurz Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/rtasd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index c638e2487a9c..e864b7c5884e 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -483,7 +483,7 @@ static void rtas_event_scan(struct work_struct *w) } #ifdef CONFIG_PPC64 -static void retreive_nvram_error_log(void) +static void retrieve_nvram_error_log(void) { unsigned int err_type ; int rc ; @@ -501,7 +501,7 @@ static void retreive_nvram_error_log(void) } } #else /* CONFIG_PPC64 */ -static void retreive_nvram_error_log(void) +static void retrieve_nvram_error_log(void) { } #endif /* CONFIG_PPC64 */ @@ -513,7 +513,7 @@ static void start_event_scan(void) (30000 / rtas_event_scan_rate)); /* Retrieve errors from nvram if any */ - retreive_nvram_error_log(); + retrieve_nvram_error_log(); schedule_delayed_work_on(cpumask_first(cpu_online_mask), &event_scan_work, event_scan_delay); -- cgit From aac6a91fea93e6bdd7ac20365d7ecc9187ca61da Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Thu, 2 Jun 2016 08:56:47 +1000 Subject: powerpc/asm: Remove unused symbols in asm-offsets.c THREAD_DSCR: Added in efcac6589a27 "powerpc: Per process DSCR + some fixes (try#4)" Last usage removed in 152d523e6307 "powerpc: Create context switch helpers save_sprs() and restore_sprs()" THREAD_DSCR_INHERIT: Added in 714332858bfd "powerpc: Restore correct DSCR in context switch" Last usage removed in 152d523e6307 "powerpc: Create context switch helpers save_sprs() and restore_sprs()" THREAD_TAR: Added in 2468dcf641e4 "powerpc: Add support for context switching the TAR register" Last usage removed in 152d523e6307 "powerpc: Create context switch helpers save_sprs() and restore_sprs()" THREAD_BESCR, THREAD_EBBHR and THREAD_EBBRR: Added in 9353374b8e15 "powerpc: Context switch the new EBB SPRs" Last usage removed in 152d523e6307 "powerpc: Create context switch helpers save_sprs() and restore_sprs()" THREAD_SIAR, THREAD_SDAR, THREAD_SIER, THREAD_MMCR0, and THREAD_MMCR2: Added in 59affcd3e460 "powerpc: Context switch more PMU related SPRs" Last usage removed in b11ae95100f7 "powerpc: Partial revert of "Context switch more PMU related SPRs"" PACA_LOCK_TOKEN: Added in 9e368f291560 "KVM: PPC: book3s_hv: Add support for PPC970-family processors" Last usage removed in c17b98cf6028 "KVM: PPC: Book3S HV: Remove code for PPC970 processors" HCALL_STAT_SIZE, HCALL_STAT_CALLS, HCALL_STAT_TB and HCALL_STAT_PURR: Added in 57852a853b0d "[POWERPC] powerpc: Instrument Hypervisor Calls" Last usage removed in c8cd093a6e9f "powerpc: tracing: Add hypervisor call tracepoints" VCPU_EPLC: Added in d30f6e480055 "KVM: PPC: booke: category E.HV (GS-mode) support" Never used. CPU_DOWN_FLUSH: Added in e7affb1dba0e "powerpc/cache: add cache flush operation for various e500" Never used. CFG_STAMP_XSEC: Added in 14cf11af6cf6 "powerpc: Merge enough to start building in arch/powerpc." Last usage removed in 0e469db8f70c "powerpc: Rework VDSO gettimeofday to prevent time going backwards" KVM_LPCR: Added in aa04b4cc5be6 "KVM: PPC: Allocate RMAs (Real Mode Areas) at boot for use by guests" Last usage removed in a0144e2a6b0b "KVM: PPC: Book3S HV: Store LPCR value for each virtual core" GPR15, GPR16, GPR17, GPR18, GPR19, GPR20, GPR21, GPR22, GPR23, GPR24, GPR25, GPR26, GPR27, GPR28, GPR29, GPR30 and GPR31: Added in 14cf11af6cf6 "powerpc: Merge enough to start building in arch/powerpc." Never used. VCPU_SHADOW_FSCR: Added in 616dff860282 "KVM: PPC: Book3S PR: Handle Facility interrupt and FSCR" Never used. VCPU_SHADOW_SRR1: Added in a2d56020d1d9 "KVM: PPC: Book3S PR: Keep volatile reg values in vcpu rather than shadow_vcpu" Never used. KVM_SPLIT_SIZE: Added in b4deba5c41e9 "KVM: PPC: Book3S HV: Implement dynamicmicro-threading on POWER8" Never used. VCPU_VCPUID: Added in de56a948b918 "KVM: PPC: Add support for Book3S processors in hypervisor mode" Last usage removed 1b400ba0cd24 "KVM: PPC: Book3S HV: Improve handling of local vs. global TLB invalidations" _MQ: Added in 14cf11af6cf6 "powerpc: Merge enough to start building in arch/powerpc." Never used. AUDITCONTEXT: Added in 14cf11af6cf6 "powerpc: Merge enough to start building in arch/powerpc." Last usage removed in 401d1f029beb "[PATCH] syscall entry/exit revamp" CLONE_VM: Added in 14cf11af6cf6 "powerpc: Merge enough to start building in arch/powerpc." Currently unused. CLONE_UNTRACED: Added in 14cf11af6cf6 "powerpc: Merge enough to start building in arch/powerpc." Currently unused. Signed-off-by: Rashmica Gupta [mpe: Munge change log] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/asm-offsets.c | 49 --------------------------------------- 1 file changed, 49 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 9ea09551a2cd..5b99f956e32f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -74,11 +74,8 @@ int main(void) DEFINE(MM, offsetof(struct task_struct, mm)); DEFINE(MMCONTEXTID, offsetof(struct mm_struct, context.id)); #ifdef CONFIG_PPC64 - DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context)); DEFINE(SIGSEGV, SIGSEGV); DEFINE(NMI_MASK, NMI_MASK); - DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr)); - DEFINE(THREAD_DSCR_INHERIT, offsetof(struct thread_struct, dscr_inherit)); DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr)); #else DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); @@ -132,17 +129,6 @@ int main(void) DEFINE(THREAD_KVM_VCPU, offsetof(struct thread_struct, kvm_vcpu)); #endif -#ifdef CONFIG_PPC_BOOK3S_64 - DEFINE(THREAD_TAR, offsetof(struct thread_struct, tar)); - DEFINE(THREAD_BESCR, offsetof(struct thread_struct, bescr)); - DEFINE(THREAD_EBBHR, offsetof(struct thread_struct, ebbhr)); - DEFINE(THREAD_EBBRR, offsetof(struct thread_struct, ebbrr)); - DEFINE(THREAD_SIAR, offsetof(struct thread_struct, siar)); - DEFINE(THREAD_SDAR, offsetof(struct thread_struct, sdar)); - DEFINE(THREAD_SIER, offsetof(struct thread_struct, sier)); - DEFINE(THREAD_MMCR0, offsetof(struct thread_struct, mmcr0)); - DEFINE(THREAD_MMCR2, offsetof(struct thread_struct, mmcr2)); -#endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM DEFINE(PACATMSCRATCH, offsetof(struct paca_struct, tm_scratch)); DEFINE(THREAD_TM_TFHAR, offsetof(struct thread_struct, tm_tfhar)); @@ -178,7 +164,6 @@ int main(void) DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page)); /* paca */ DEFINE(PACA_SIZE, sizeof(struct paca_struct)); - DEFINE(PACA_LOCK_TOKEN, offsetof(struct paca_struct, lock_token)); DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index)); DEFINE(PACAPROCSTART, offsetof(struct paca_struct, cpu_start)); DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack)); @@ -275,12 +260,6 @@ int main(void) /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */ DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); - - /* hcall statistics */ - DEFINE(HCALL_STAT_SIZE, sizeof(struct hcall_stats)); - DEFINE(HCALL_STAT_CALLS, offsetof(struct hcall_stats, num_calls)); - DEFINE(HCALL_STAT_TB, offsetof(struct hcall_stats, tb_total)); - DEFINE(HCALL_STAT_PURR, offsetof(struct hcall_stats, purr_total)); #endif /* CONFIG_PPC64 */ DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0])); DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1])); @@ -298,23 +277,6 @@ int main(void) DEFINE(GPR13, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[13])); #ifndef CONFIG_PPC64 DEFINE(GPR14, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[14])); - DEFINE(GPR15, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[15])); - DEFINE(GPR16, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[16])); - DEFINE(GPR17, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[17])); - DEFINE(GPR18, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[18])); - DEFINE(GPR19, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[19])); - DEFINE(GPR20, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[20])); - DEFINE(GPR21, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[21])); - DEFINE(GPR22, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[22])); - DEFINE(GPR23, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[23])); - DEFINE(GPR24, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[24])); - DEFINE(GPR25, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[25])); - DEFINE(GPR26, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[26])); - DEFINE(GPR27, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[27])); - DEFINE(GPR28, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[28])); - DEFINE(GPR29, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[29])); - DEFINE(GPR30, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[30])); - DEFINE(GPR31, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[31])); #endif /* CONFIG_PPC64 */ /* * Note: these symbols include _ because they overlap with special @@ -332,7 +294,6 @@ int main(void) DEFINE(RESULT, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, result)); DEFINE(_TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap)); #ifndef CONFIG_PPC64 - DEFINE(_MQ, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, mq)); /* * The PowerPC 400-class & Book-E processors have neither the DAR * nor the DSISR SPRs. Hence, we overload them to hold the similar @@ -369,8 +330,6 @@ int main(void) DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, saved_ksp_limit)); #endif #endif - DEFINE(CLONE_VM, CLONE_VM); - DEFINE(CLONE_UNTRACED, CLONE_UNTRACED); #ifndef CONFIG_PPC64 DEFINE(MM_PGD, offsetof(struct mm_struct, pgd)); @@ -380,7 +339,6 @@ int main(void) DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features)); DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup)); DEFINE(CPU_SPEC_RESTORE, offsetof(struct cpu_spec, cpu_restore)); - DEFINE(CPU_DOWN_FLUSH, offsetof(struct cpu_spec, cpu_down_flush)); DEFINE(pbe_address, offsetof(struct pbe, address)); DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); @@ -395,7 +353,6 @@ int main(void) DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct vdso_data, tb_orig_stamp)); DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct vdso_data, tb_ticks_per_sec)); DEFINE(CFG_TB_TO_XS, offsetof(struct vdso_data, tb_to_xs)); - DEFINE(CFG_STAMP_XSEC, offsetof(struct vdso_data, stamp_xsec)); DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct vdso_data, tb_update_count)); DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct vdso_data, tz_minuteswest)); DEFINE(CFG_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); @@ -517,7 +474,6 @@ int main(void) DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1)); DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits)); DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls)); - DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr)); DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); @@ -528,7 +484,6 @@ int main(void) DEFINE(VCPU_THREAD_CPU, offsetof(struct kvm_vcpu, arch.thread_cpu)); #endif #ifdef CONFIG_PPC_BOOK3S - DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr)); DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr)); DEFINE(VCPU_IC, offsetof(struct kvm_vcpu, arch.ic)); @@ -566,7 +521,6 @@ int main(void) DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar)); DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr)); DEFINE(VCPU_FSCR, offsetof(struct kvm_vcpu, arch.fscr)); - DEFINE(VCPU_SHADOW_FSCR, offsetof(struct kvm_vcpu, arch.shadow_fscr)); DEFINE(VCPU_PSPB, offsetof(struct kvm_vcpu, arch.pspb)); DEFINE(VCPU_EBBHR, offsetof(struct kvm_vcpu, arch.ebbhr)); DEFINE(VCPU_EBBRR, offsetof(struct kvm_vcpu, arch.ebbrr)); @@ -576,7 +530,6 @@ int main(void) DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr)); DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop)); DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort)); - DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1)); DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_map)); DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads)); @@ -693,7 +646,6 @@ int main(void) DEFINE(KVM_SPLIT_RPR, offsetof(struct kvm_split_mode, rpr)); DEFINE(KVM_SPLIT_PMMAR, offsetof(struct kvm_split_mode, pmmar)); DEFINE(KVM_SPLIT_LDBAR, offsetof(struct kvm_split_mode, ldbar)); - DEFINE(KVM_SPLIT_SIZE, offsetof(struct kvm_split_mode, subcore_size)); DEFINE(KVM_SPLIT_DO_NAP, offsetof(struct kvm_split_mode, do_nap)); DEFINE(KVM_SPLIT_NAPPED, offsetof(struct kvm_split_mode, napped)); #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ @@ -756,7 +708,6 @@ int main(void) #ifdef CONFIG_KVM_BOOKE_HV DEFINE(VCPU_HOST_MAS4, offsetof(struct kvm_vcpu, arch.host_mas4)); DEFINE(VCPU_HOST_MAS6, offsetof(struct kvm_vcpu, arch.host_mas6)); - DEFINE(VCPU_EPLC, offsetof(struct kvm_vcpu, arch.eplc)); #endif #ifdef CONFIG_KVM_EXIT_TIMING -- cgit From 1d1451655bad9a6a5fd7a42de68420069ce3bee3 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Wed, 11 May 2016 10:57:32 +1000 Subject: powerpc: Add array bounds checking to crash_shutdown_handlers The array crash_shutdown_handles is an array of size CRASH_HANDLER_MAX+1 containing up to CRASH_HANDLER_MAX shutdown_handlers. It is assumed to be NULL terminated, which it is under normal circumstances. Array accesses in the functions crash_shutdown_unregister() and default_machine_crash_shutdown() rely on this NULL termination property when traversing this list and don't protect again out of bounds accesses. If the NULL terminator were somehow overwritten these functions could potentially access out of the bounds of the array. Shrink the array to size CRASH_HANDLER_MAX and implement explicit array bounds checking when accessing the elements of the crash_shutdown_handles[] array in crash_shutdown_unregister() and default_machine_crash_shutdown(). Signed-off-by: Suraj Jitindar Singh Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/crash.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 2bb252c01f07..3dc1fade0b28 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -48,8 +48,8 @@ int crashing_cpu = -1; static int time_to_dump; #define CRASH_HANDLER_MAX 3 -/* NULL terminated list of shutdown handles */ -static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1]; +/* List of shutdown handles */ +static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX]; static DEFINE_SPINLOCK(crash_handlers_lock); static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; @@ -288,9 +288,14 @@ int crash_shutdown_unregister(crash_shutdown_t handler) rc = 1; } else { /* Shift handles down */ - for (; crash_shutdown_handles[i]; i++) + for (; i < (CRASH_HANDLER_MAX - 1); i++) crash_shutdown_handles[i] = crash_shutdown_handles[i+1]; + /* + * Reset last entry to NULL now that it has been shifted down, + * this will allow new handles to be added here. + */ + crash_shutdown_handles[i] = NULL; rc = 0; } @@ -346,7 +351,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) old_handler = __debugger_fault_handler; __debugger_fault_handler = handle_fault; crash_shutdown_cpu = smp_processor_id(); - for (i = 0; crash_shutdown_handles[i]; i++) { + for (i = 0; crash_shutdown_handles[i] && i < CRASH_HANDLER_MAX; i++) { if (setjmp(crash_shutdown_buf) == 0) { /* * Insert syncs and delay to ensure -- cgit From 34852ed5511ec5d07897f22d5607061a248fc82f Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Wed, 18 May 2016 11:16:49 +1000 Subject: powerpc/sparse: make some things static This is just a smattering of things picked up by sparse that should be made static. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/crash.c | 2 +- arch/powerpc/kernel/sysfs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 3dc1fade0b28..888bdf198c3e 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -65,7 +65,7 @@ static int handle_fault(struct pt_regs *regs) #ifdef CONFIG_SMP static atomic_t cpus_in_crash; -void crash_ipi_callback(struct pt_regs *regs) +static void crash_ipi_callback(struct pt_regs *regs) { static cpumask_t cpus_state_saved = CPU_MASK_NONE; diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 692873bff334..c4f1d1f7bae0 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -35,7 +35,7 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); #ifdef CONFIG_PPC64 /* Time in microseconds we delay before sleeping in the idle loop */ -DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 }; +static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 }; static ssize_t store_smt_snooze_delay(struct device *dev, struct device_attribute *attr, -- cgit From 42f5b4cacd783faf05e3ff8bf85e8be31f3dfa9d Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Wed, 18 May 2016 11:16:50 +1000 Subject: powerpc: Introduce asm-prototypes.h Sparse picked up a number of functions that are implemented in C and then only referred to in asm code. This introduces asm-prototypes.h, which provides a place for prototypes of these functions. This silences some sparse warnings. Signed-off-by: Daniel Axtens [mpe: Add include guards, clean up copyright & GPL text] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/machine_kexec_64.c | 1 + arch/powerpc/kernel/smp.c | 1 + arch/powerpc/kernel/traps.c | 1 + 3 files changed, 3 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index b8c202d63ecb..50bf55135ef8 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -29,6 +29,7 @@ #include #include #include +#include #ifdef CONFIG_PPC_BOOK3E int default_machine_kexec_prepare(struct kimage *image) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 55c924b65f71..f1adc3c4f4ca 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -53,6 +53,7 @@ #include #include #include +#include #ifdef DEBUG #include diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 9229ba63c370..11d15e7270e0 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) -- cgit From 665e87ffe1c400c525c3a4cd6fcb5db75972fadd Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Wed, 18 May 2016 11:16:51 +1000 Subject: powerpc/sparse: Include headers containing prototypes Sometimes headers that provide prototypes for functions are accidentally omitted from the files that define the functions. Fix a couple of times that occurs. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/smp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index f1adc3c4f4ca..1b55c7864291 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include -- cgit From a9650e9bc53239c30c39f77d9d8541e84641298a Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Wed, 18 May 2016 11:16:52 +1000 Subject: powerpc/align: Use #ifdef __BIG_ENDIAN__ #else for REG_BYTE Sparse complains that it doesn't know what REG_BYTE is: arch/powerpc/kernel/align.c:313:29: error: undefined identifier 'REG_BYTE' REG_BYTE is defined differently based on whether we're compiling for LE, BE32 or BE64. Sparse apparently doesn't provide __BIG_ENDIAN__ or __LITTLE_ENDIAN__, which means we get no definition. Rather than check for __BIG_ENDIAN__ and then separately for __LITTLE_ENDIAN__, just switch the #ifdef to check for __BIG_ENDIAN__ and then #else we define the little endian version. Technically that's dicey because PDP_ENDIAN is also a possibility, but we already do it in a lot of places so one more hardly matters. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/align.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 8e7cb8e2b21a..d7ad66bc5bdf 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -228,9 +228,7 @@ static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr) #else #define REG_BYTE(rp, i) *((u8 *)(rp) + (i)) #endif -#endif - -#ifdef __LITTLE_ENDIAN__ +#else #define REG_BYTE(rp, i) (*(((u8 *)((rp) + ((i)>>2)) + ((i)&3)))) #endif -- cgit From fb36e90736938d50fdaa1be7afdb21608d402c2b Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Fri, 17 Jun 2016 15:25:17 +1000 Subject: powerpc/pci: Fix SRIOV not building without EEH enabled On Book3E CPUs (and possibly other configs), it is possible to have SRIOV (CONFIG_PCI_IOV) set without CONFIG_EEH. The SRIOV code does not check for this, and if EEH is disabled, pci_dn.c fails to build. Fix this by gating the EEH-specific code in the SRIOV implementation behind CONFIG_EEH. Fixes: 39218cd0 ("powerpc/eeh: EEH device for VF") Reported-by: Michael Ellerman Signed-off-by: Russell Currey Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci_dn.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index ecdccce78719..afeda26c2ebc 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -181,7 +181,9 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) { #ifdef CONFIG_PCI_IOV struct pci_dn *parent, *pdn; +#ifdef CONFIG_EEH struct eeh_dev *edev; +#endif /* CONFIG_EEH */ int i; /* Only support IOV for now */ @@ -208,11 +210,13 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) return NULL; } +#ifdef CONFIG_EEH /* Create the EEH device for the VF */ eeh_dev_init(pdn, pci_bus_to_host(pdev->bus)); edev = pdn_to_eeh_dev(pdn); BUG_ON(!edev); edev->physfn = pdev; +#endif /* CONFIG_EEH */ } #endif /* CONFIG_PCI_IOV */ @@ -266,12 +270,14 @@ void remove_dev_pci_data(struct pci_dev *pdev) pdn->devfn != pci_iov_virtfn_devfn(pdev, i)) continue; +#ifdef CONFIG_EEH /* Release EEH device for the VF */ edev = pdn_to_eeh_dev(pdn); if (edev) { pdn->edev = NULL; kfree(edev); } +#endif /* CONFIG_EEH */ if (!list_empty(&pdn->list)) list_del(&pdn->list); -- cgit From 61ed9cfb1b0951a3b4b98dd8bfb98eeb112cfee4 Mon Sep 17 00:00:00 2001 From: Thiago Jung Bauermann Date: Thu, 31 Mar 2016 17:10:40 -0300 Subject: powerpc/kprobes: Remove kretprobe_trampoline_holder. Fixes the following testsuite failure: $ sudo ./perf test -v kallsyms 1: vmlinux symtab matches kallsyms : --- start --- test child forked, pid 12489 Using /proc/kcore for kernel object code Looking at the vmlinux_path (8 entries long) Using /boot/vmlinux for symbols 0xc00000000003d300: diff name v: .kretprobe_trampoline_holder k: kretprobe_trampoline Maps only in vmlinux: c00000000086ca38-c000000000879b6c 87ca38 [kernel].text.unlikely c000000000879b6c-c000000000bf0000 889b6c [kernel].meminit.text c000000000bf0000-c000000000c53264 c00000 [kernel].init.text c000000000c53264-d000000004250000 c63264 [kernel].exit.text d000000004250000-d000000004450000 0 [libcrc32c] d000000004450000-d000000004620000 0 [xfs] d000000004620000-d000000004680000 0 [autofs4] d000000004680000-d0000000046e0000 0 [x_tables] d0000000046e0000-d000000004780000 0 [ip_tables] d000000004780000-d0000000047e0000 0 [rng_core] d0000000047e0000-ffffffffffffffff 0 [pseries_rng] Maps in vmlinux with a different name in kallsyms: Maps only in kallsyms: d000000000000000-f000000000000000 1000000000010000 [kernel.kallsyms] f000000000000000-ffffffffffffffff 3000000000010000 [kernel.kallsyms] test child finished with -1 ---- end ---- vmlinux symtab matches kallsyms: FAILED! The problem is that the kretprobe_trampoline symbol looks like this: $ eu-readelf -s /boot/vmlinux G kretprobe_trampoline 2431: c000000001302368 24 NOTYPE LOCAL DEFAULT 37 kretprobe_trampoline_holder 2432: c00000000003d300 8 FUNC LOCAL DEFAULT 1 .kretprobe_trampoline_holder 97543: c00000000003d300 0 NOTYPE GLOBAL DEFAULT 1 kretprobe_trampoline Its type is NOTYPE, and its size is 0, and this is a problem because symbol-elf.c:dso__load_sym skips function symbols that are not STT_FUNC or STT_GNU_IFUNC (this is determined by elf_sym__is_function). Even if the type is changed to STT_FUNC, when dso__load_sym calls symbols__fixup_duplicate, the kretprobe_trampoline symbol is dropped in favour of .kretprobe_trampoline_holder because the latter has non-zero size (as determined by choose_best_symbol). With this patch, all vmlinux symbols match /proc/kallsyms and the testcase passes. Commit c1c355ce14c0 ("x86/kprobes: Get rid of kretprobe_trampoline_holder()") gets rid of kretprobe_trampoline_holder altogether on x86. This commit does the same on powerpc. This change introduces no regressions on the perf and ftracetest testsuite results. Reviewed-by: Naveen N. Rao Signed-off-by: Thiago Jung Bauermann Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/kprobes.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 7d48e3baa38b..3ed8ec09b5c9 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -278,12 +278,11 @@ no_kprobe: * - When the probed function returns, this probe * causes the handlers to fire */ -static void __used kretprobe_trampoline_holder(void) -{ - asm volatile(".global kretprobe_trampoline\n" - "kretprobe_trampoline:\n" - "nop\n"); -} +asm(".global kretprobe_trampoline\n" + ".type kretprobe_trampoline, @function\n" + "kretprobe_trampoline:\n" + "nop\n" + ".size kretprobe_trampoline, .-kretprobe_trampoline\n"); /* * Called when the probe at kretprobe trampoline is hit -- cgit From 103b7827d977ea34c982e6a9d2f960f731f7ee76 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Fri, 4 Mar 2016 10:31:48 +0530 Subject: powerpc: Fix misleading comment in early_setup_secondary() Current comment in the early_setup_secondary() for paca->soft_enabled update is misleading. Comment should say to Mark interrupts "disabled" instead of "enabled". Fix the typo. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 96d4a2b23d0f..5530bb55a78b 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -321,7 +321,7 @@ void __init early_setup(unsigned long dt_ptr) #ifdef CONFIG_SMP void early_setup_secondary(void) { - /* Mark interrupts enabled in PACA */ + /* Mark interrupts disabled in PACA */ get_paca()->soft_enabled = 0; /* Initialize the hash table or TLB handling */ -- cgit From b57bd2de8c6c9aa03f1b899edd6f5582cc8b5b08 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 9 Jun 2016 12:31:08 +1000 Subject: powerpc: Improve FSCR init and context switching This fixes a few issues with FSCR init and switching. In commit 152d523e6307 ("powerpc: Create context switch helpers save_sprs() and restore_sprs()") we moved the setting of the FSCR register from inside an CPU_FTR_ARCH_207S section to inside just a CPU_FTR_ARCH_DSCR section. Hence we are setting FSCR on POWER6/7 where the FSCR doesn't exist. This is harmless but we shouldn't do it. Also, we can simplify the FSCR context switch. We don't need to go through the calculation involving dscr_inherit. We can just restore what we saved last time. We also set an initial value in INIT_THREAD, so that pid 1 which is cloned from that gets a sane value. Based on patch by Jack Miller. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 12 ++++-------- arch/powerpc/kernel/traps.c | 3 ++- 2 files changed, 6 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index c5c3ae2ef3c1..6d0a831bc7d8 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1031,18 +1031,11 @@ static inline void restore_sprs(struct thread_struct *old_thread, #ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_DSCR)) { u64 dscr = get_paca()->dscr_default; - u64 fscr = old_thread->fscr & ~FSCR_DSCR; - - if (new_thread->dscr_inherit) { + if (new_thread->dscr_inherit) dscr = new_thread->dscr; - fscr |= FSCR_DSCR; - } if (old_thread->dscr != dscr) mtspr(SPRN_DSCR, dscr); - - if (old_thread->fscr != fscr) - mtspr(SPRN_FSCR, fscr); } if (cpu_has_feature(CPU_FTR_ARCH_207S)) { @@ -1053,6 +1046,9 @@ static inline void restore_sprs(struct thread_struct *old_thread, if (old_thread->ebbrr != new_thread->ebbrr) mtspr(SPRN_EBBRR, new_thread->ebbrr); + if (old_thread->fscr != new_thread->fscr) + mtspr(SPRN_FSCR, new_thread->fscr); + if (old_thread->tar != new_thread->tar) mtspr(SPRN_TAR, new_thread->tar); } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 11d15e7270e0..d2518c3cbf04 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1419,7 +1419,8 @@ void facility_unavailable_exception(struct pt_regs *regs) rd = (instword >> 21) & 0x1f; current->thread.dscr = regs->gpr[rd]; current->thread.dscr_inherit = 1; - mtspr(SPRN_FSCR, value | FSCR_DSCR); + current->thread.fscr |= FSCR_DSCR; + mtspr(SPRN_FSCR, current->thread.fscr); } /* Read from DSCR (mfspr RT, 0x03) */ -- cgit From bd3ea317fddfd0f2044f94bed294b90c4bc8e69e Mon Sep 17 00:00:00 2001 From: Jack Miller Date: Thu, 9 Jun 2016 12:31:09 +1000 Subject: powerpc: Load Monitor Register Support This enables new registers, LMRR and LMSER, that can trigger an EBB in userspace code when a monitored load (via the new ldmx instruction) loads memory from a monitored space. This facility is controlled by a new FSCR bit, LM. This patch disables the FSCR LM control bit on task init and enables that bit when a load monitor facility unavailable exception is taken for using it. On context switch, this bit is then used to determine whether the two relevant registers are saved and restored. This is done lazily for performance reasons. Signed-off-by: Jack Miller Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 18 ++++++++++++++++++ arch/powerpc/kernel/traps.c | 9 +++++++++ 2 files changed, 27 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 6d0a831bc7d8..ddceeb96e8fb 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1017,6 +1017,14 @@ static inline void save_sprs(struct thread_struct *t) */ t->tar = mfspr(SPRN_TAR); } + + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + /* Conditionally save Load Monitor registers, if enabled */ + if (t->fscr & FSCR_LM) { + t->lmrr = mfspr(SPRN_LMRR); + t->lmser = mfspr(SPRN_LMSER); + } + } #endif } @@ -1052,6 +1060,16 @@ static inline void restore_sprs(struct thread_struct *old_thread, if (old_thread->tar != new_thread->tar) mtspr(SPRN_TAR, new_thread->tar); } + + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + /* Conditionally restore Load Monitor registers, if enabled */ + if (new_thread->fscr & FSCR_LM) { + if (old_thread->lmrr != new_thread->lmrr) + mtspr(SPRN_LMRR, new_thread->lmrr); + if (old_thread->lmser != new_thread->lmser) + mtspr(SPRN_LMSER, new_thread->lmser); + } + } #endif } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index d2518c3cbf04..f7e2f2e318bd 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1377,6 +1377,7 @@ void facility_unavailable_exception(struct pt_regs *regs) [FSCR_TM_LG] = "TM", [FSCR_EBB_LG] = "EBB", [FSCR_TAR_LG] = "TAR", + [FSCR_LM_LG] = "LM", }; char *facility = "unknown"; u64 value; @@ -1434,6 +1435,14 @@ void facility_unavailable_exception(struct pt_regs *regs) emulate_single_step(regs); } return; + } else if ((status == FSCR_LM_LG) && cpu_has_feature(CPU_FTR_ARCH_300)) { + /* + * This process has touched LM, so turn it on forever + * for this process + */ + current->thread.fscr |= FSCR_LM; + mtspr(SPRN_FSCR, current->thread.fscr); + return; } if ((status < ARRAY_SIZE(facility_strings)) && -- cgit From f8ab481066e7246e4b272233aa0b6948f5069f41 Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Thu, 2 Jun 2016 08:45:14 -0300 Subject: powerpc: export cpu_to_core_id() Export cpu_to_core_id(). This will be used by the lpfc driver. This enables topology_core_id() from (defined to cpu_to_core_id() in arch/powerpc/include/asm/topology.h) to be used by (non-builtin) modules. That is arch-neutral, already used by eg, drivers/base/topology.c, but it is builtin (obj-y in Makefile) thus didn't need the export. Since the module uses topology_core_id() and this is defined to cpu_to_core_id(), it needs the export, otherwise: ERROR: "cpu_to_core_id" [drivers/scsi/lpfc/lpfc.ko] undefined! Tested on next-20160601. Signed-off-by: Mauricio Faria de Oliveira Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/smp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 1b55c7864291..5a1f015ea9f3 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -595,6 +595,7 @@ out: of_node_put(np); return id; } +EXPORT_SYMBOL_GPL(cpu_to_core_id); /* Helper routines for cpu to core mapping */ int cpu_core_index_of_thread(int cpu) -- cgit From c5fcb29a649723806a350dcb8854610f2f6b8819 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 20 May 2016 16:41:26 +1000 Subject: powerpc/pci: Override pcibios_setup_bridge() This overrides pcibios_setup_bridge() that is called to update PCI bridge windows when PCI resource assignment is completed, to assign PE and setup various (resource) mapping for the PE in subsequent patches. Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci-common.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 0f7a60f1e9f6..40df3a551b14 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -124,6 +124,14 @@ resource_size_t pcibios_window_alignment(struct pci_bus *bus, return 1; } +void pcibios_setup_bridge(struct pci_bus *bus, unsigned long type) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + + if (hose->controller_ops.setup_bridge) + hose->controller_ops.setup_bridge(bus, type); +} + void pcibios_reset_secondary_bus(struct pci_dev *dev) { struct pci_controller *phb = pci_bus_to_host(dev->bus); -- cgit From 7415c14c560e7378b9cd3564c4c4f6b5e058e19d Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 20 May 2016 16:41:36 +1000 Subject: powerpc/pci: Update bridge windows on PCI plug On the PCI plugging event, PCI slot's subordinate devices are scanned and their (IO and MMIO) resources are assigned. Platform dependent resources (PE#, IO/MMIO/DMA windows) are allocated or created on updating windows of the slot's upstream bridge. This updates the windows of the hot plugged slot's upstream bridge in pcibios_finish_adding_to_bus() so that the platform resources (PE#, IO/MMIO/DMA segments) are allocated or created accordingly. Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci-common.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 40df3a551b14..be9e51516ac0 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1444,8 +1444,12 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus) /* Allocate bus and devices resources */ pcibios_allocate_bus_resources(bus); pcibios_claim_one_bus(bus); - if (!pci_has_flag(PCI_PROBE_ONLY)) - pci_assign_unassigned_bus_resources(bus); + if (!pci_has_flag(PCI_PROBE_ONLY)) { + if (bus->self) + pci_assign_unassigned_bridge_resources(bus->self); + else + pci_assign_unassigned_bus_resources(bus); + } /* Fixup EEH */ eeh_add_device_tree_late(bus); -- cgit From 8cc7581cdb84a232468c41bc417183a423dfbb07 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 20 May 2016 16:41:37 +1000 Subject: powerpc/pci: Delay populating pdn The pdn (struct pci_dn) instances are allocated from memblock or bootmem when creating PCI controller (hoses) in setup_arch(). PCI hotplug, which will be supported by proceeding patches, releases PCI device nodes and their corresponding pdn on unplugging event. The memory chunks for pdn instances allocated from memblock or bootmem are hard to reused after being released. This delays creating pdn by pci_devs_phb_init() from setup_arch() to core_initcall() so that they are allocated from slab. The memory consumed by pdn can be released to system without problem during PCI unplugging time. It indicates that pci_dn is unavailable in setup_arch() and the the fixup on pdn (like AGP's) can't be carried out that time. We have to do that in pcibios_root_bridge_prepare() on maple/pasemi/powermac platforms where/when the pdn is available. pcibios_root_bridge_prepare is called from subsys_initcall() which is executed after core_initcall() so the code flow does not change. At the mean while, the EEH device is created when pdn is populated, meaning pdn and EEH device have same life cycle. In turn, we needn't call eeh_dev_init() to create EEH device explicitly. Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_dev.c | 17 +++-------------- arch/powerpc/kernel/pci_dn.c | 23 +++++++++++++++++++---- 2 files changed, 22 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c index 7815095fe3d8..d6b2ca70d14d 100644 --- a/arch/powerpc/kernel/eeh_dev.c +++ b/arch/powerpc/kernel/eeh_dev.c @@ -44,14 +44,13 @@ /** * eeh_dev_init - Create EEH device according to OF node * @pdn: PCI device node - * @data: PHB * * It will create EEH device according to the given OF node. The function * might be called by PCI emunation, DR, PHB hotplug. */ -void *eeh_dev_init(struct pci_dn *pdn, void *data) +struct eeh_dev *eeh_dev_init(struct pci_dn *pdn) { - struct pci_controller *phb = data; + struct pci_controller *phb = pdn->phb; struct eeh_dev *edev; /* Allocate EEH device */ @@ -69,7 +68,7 @@ void *eeh_dev_init(struct pci_dn *pdn, void *data) INIT_LIST_HEAD(&edev->list); INIT_LIST_HEAD(&edev->rmv_list); - return NULL; + return edev; } /** @@ -81,16 +80,8 @@ void *eeh_dev_init(struct pci_dn *pdn, void *data) */ void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { - struct pci_dn *root = phb->pci_data; - /* EEH PE for PHB */ eeh_phb_pe_create(phb); - - /* EEH device for PHB */ - eeh_dev_init(root, phb); - - /* EEH devices for children OF nodes */ - traverse_pci_dn(root, eeh_dev_init, phb); } /** @@ -106,8 +97,6 @@ static int __init eeh_dev_phb_init(void) list_for_each_entry_safe(phb, tmp, &hose_list, list_node) eeh_dev_phb_init_dynamic(phb); - pr_info("EEH: devices created\n"); - return 0; } diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index afeda26c2ebc..bfe60a1e70d9 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -212,8 +212,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) #ifdef CONFIG_EEH /* Create the EEH device for the VF */ - eeh_dev_init(pdn, pci_bus_to_host(pdev->bus)); - edev = pdn_to_eeh_dev(pdn); + edev = eeh_dev_init(pdn); BUG_ON(!edev); edev->physfn = pdev; #endif /* CONFIG_EEH */ @@ -295,8 +294,11 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose, const __be32 *regs; struct device_node *parent; struct pci_dn *pdn; +#ifdef CONFIG_EEH + struct eeh_dev *edev; +#endif - pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL); + pdn = kzalloc(sizeof(*pdn), GFP_KERNEL); if (pdn == NULL) return NULL; dn->data = pdn; @@ -325,6 +327,15 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose, /* Extended config space */ pdn->pci_ext_config_space = (type && of_read_number(type, 1) == 1); + /* Create EEH device */ +#ifdef CONFIG_EEH + edev = eeh_dev_init(pdn); + if (!edev) { + kfree(pdn); + return NULL; + } +#endif + /* Attach to parent node */ INIT_LIST_HEAD(&pdn->child_list); INIT_LIST_HEAD(&pdn->list); @@ -510,15 +521,19 @@ void pci_devs_phb_init_dynamic(struct pci_controller *phb) * pci device found underneath. This routine runs once, * early in the boot sequence. */ -void __init pci_devs_phb_init(void) +static int __init pci_devs_phb_init(void) { struct pci_controller *phb, *tmp; /* This must be done first so the device nodes have valid pci info! */ list_for_each_entry_safe(phb, tmp, &hose_list, list_node) pci_devs_phb_init_dynamic(phb); + + return 0; } +core_initcall(pci_devs_phb_init); + static void pci_dev_pdn_setup(struct pci_dev *pdev) { struct pci_dn *pdn; -- cgit From cdb1b3424dba7d38a2835f6f5f5aaeae74885410 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 22 Jun 2016 17:23:07 +1000 Subject: powerpc/pci: Reduce log level of PCI I/O space warning If a PHB has no I/O space, there's no need to make it look like something bad happened, a pr_debug() is plenty enough since this is the case of all our modern POWER chips. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci-common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index be9e51516ac0..d1f91e1a813b 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1497,9 +1497,9 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose, res = &hose->io_resource; if (!res->flags) { - pr_info("PCI: I/O resource not set for host" - " bridge %s (domain %d)\n", - hose->dn->full_name, hose->global_number); + pr_debug("PCI: I/O resource not set for host" + " bridge %s (domain %d)\n", + hose->dn->full_name, hose->global_number); } else { offset = pcibios_io_space_offset(hose); -- cgit From 4a03749f140cbee6fee66b674ba763942d1446f2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 27 Jun 2016 12:07:41 +0100 Subject: powerpc/fadump: Trivial fix of spelling mistake, clean up message Fix trivial spelling mistake "rgistration". Also use pr_err() instead of printk() and unsplit the string to keep it all on one line. Signed-off-by: Colin Ian King [mpe: Keep rc on the same line, splitting it doesn't help] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/fadump.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 3cb3b02a13dd..f0664860753e 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1009,9 +1009,8 @@ static int fadump_invalidate_dump(struct fadump_mem_struct *fdm) } while (wait_time); if (rc) { - printk(KERN_ERR "Failed to invalidate firmware-assisted dump " - "rgistration. unexpected error(%d).\n", rc); - return rc; + pr_err("Failed to invalidate firmware-assisted dump registration. Unexpected error (%d).\n", rc); + return rc } fw_dump.dump_active = 0; fdm_active = NULL; -- cgit From b5b1cfc5d4d8457e98bbab0b8402c07b3938c3e6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 5 Jul 2016 23:45:56 +1000 Subject: powerpc/fadump: Fix build error introduced by recent cleanup We spent so much time bike-shedding the printk() we missed that the next line was missing a semi-colon. And it seems none of our defconfigs turn on CONFIG_FA_DUMP. Fixes: 4a03749f140c ("powerpc/fadump: Trivial fix of spelling mistake, clean up message") Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/fadump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index f0664860753e..b3a663333d36 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1010,7 +1010,7 @@ static int fadump_invalidate_dump(struct fadump_mem_struct *fdm) if (rc) { pr_err("Failed to invalidate firmware-assisted dump registration. Unexpected error (%d).\n", rc); - return rc + return rc; } fw_dump.dump_active = 0; fdm_active = NULL; -- cgit From 393eb79ad32fedbdcbcd51bca38cf66291f6d528 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Sun, 26 Jun 2016 23:07:06 +0530 Subject: powerpc/perf: factor out power8 __init_pmu code Factor out the power8 pmu init functions to share with power9. Monitor Mode Control Register S(MMCRS) and Monitor Mode Control Register H(MMCRH) registers are dropped in Power9. These registers are added to new function which are included for power8 init. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/cpu_setup_power.S | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 584e119fa8b0..ec8a228df2f6 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -51,6 +51,7 @@ _GLOBAL(__setup_cpu_power8) mflr r11 bl __init_FSCR bl __init_PMU + bl __init_PMU_ISA207 bl __init_hvmode_206 mtlr r11 beqlr @@ -62,6 +63,7 @@ _GLOBAL(__setup_cpu_power8) bl __init_HFSCR bl __init_tlb_power8 bl __init_PMU_HV + bl __init_PMU_HV_ISA207 mtlr r11 blr @@ -69,6 +71,7 @@ _GLOBAL(__restore_cpu_power8) mflr r11 bl __init_FSCR bl __init_PMU + bl __init_PMU_ISA207 mfmsr r3 rldicl. r0,r3,4,63 mtlr r11 @@ -81,12 +84,14 @@ _GLOBAL(__restore_cpu_power8) bl __init_HFSCR bl __init_tlb_power8 bl __init_PMU_HV + bl __init_PMU_HV_ISA207 mtlr r11 blr _GLOBAL(__setup_cpu_power9) mflr r11 bl __init_FSCR + bl __init_PMU bl __init_hvmode_206 mtlr r11 beqlr @@ -97,12 +102,14 @@ _GLOBAL(__setup_cpu_power9) bl __init_LPCR bl __init_HFSCR bl __init_tlb_power9 + bl __init_PMU_HV mtlr r11 blr _GLOBAL(__restore_cpu_power9) mflr r11 bl __init_FSCR + bl __init_PMU mfmsr r3 rldicl. r0,r3,4,63 mtlr r11 @@ -114,6 +121,7 @@ _GLOBAL(__restore_cpu_power9) bl __init_LPCR bl __init_HFSCR bl __init_tlb_power9 + bl __init_PMU_HV mtlr r11 blr @@ -208,14 +216,22 @@ __init_tlb_power9: __init_PMU_HV: li r5,0 mtspr SPRN_MMCRC,r5 + blr + +__init_PMU_HV_ISA207: + li r5,0 mtspr SPRN_MMCRH,r5 blr __init_PMU: li r5,0 - mtspr SPRN_MMCRS,r5 mtspr SPRN_MMCRA,r5 mtspr SPRN_MMCR0,r5 mtspr SPRN_MMCR1,r5 mtspr SPRN_MMCR2,r5 blr + +__init_PMU_ISA207: + li r5,0 + mtspr SPRN_MMCRS,r5 + blr -- cgit From ae26b36f8098c793a754549662771099215904ed Mon Sep 17 00:00:00 2001 From: Chris Smart Date: Fri, 17 Jun 2016 09:33:45 +1000 Subject: powerpc: Send SIGBUS on unaligned copy and paste Calling ISA 3.0 instructions copy, copy_first, paste and paste_last generates an alignment fault when copying or pasting unaligned data (128 byte). We catch this and send SIGBUS to the userspace process that caused it. We do not emulate these because paste may contain additional metadata when pasting to a co-processor and paste_last is the synchronisation point for preceding copy/paste sequences. Thanks to Michael Neuling for his help. Signed-off-by: Chris Smart Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/align.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index d7ad66bc5bdf..c7097f933114 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -873,6 +873,20 @@ int fix_alignment(struct pt_regs *regs) return emulate_vsx(addr, reg, areg, regs, flags, nb, elsize); } #endif + + /* + * ISA 3.0 (such as P9) copy, copy_first, paste and paste_last alignment + * check. + * + * Send a SIGBUS to the process that caused the fault. + * + * We do not emulate these because paste may contain additional metadata + * when pasting to a co-processor. Furthermore, paste_last is the + * synchronisation point for preceding copy/paste sequences. + */ + if ((instruction & 0xfc0006fe) == PPC_INST_COPY) + return -EIO; + /* A size of 0 indicates an instruction we don't support, with * the exception of DCBZ which is handled as a special case here */ -- cgit From a9862c7440f191439a51f77233f89f7e40efe02e Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 18 Mar 2016 17:36:33 +1100 Subject: powerpc/rtas: Fix array overrun in ppc_rtas() syscall If ppc_rtas() is called with args.nargs == 16 and args.nret == 0, args.rets is set to point to &args.args[16], which is beyond the end of the args.args array. This results in a minor read overrun of the array when we check the first return code (which, per PAPR, is a required output of all RTAS calls) to see if there's been a hardware error. Change the nargs/nret check to ensure nargs is <= 15, allowing room for the status code. Users shouldn't be calling with nret == 0, but there's no real harm if they do, so we don't stop them. Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/rtas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 28736ff27fea..8da209fdf480 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1070,7 +1070,7 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) nret = be32_to_cpu(args.nret); token = be32_to_cpu(args.token); - if (nargs > ARRAY_SIZE(args.args) + if (nargs >= ARRAY_SIZE(args.args) || nret > ARRAY_SIZE(args.args) || nargs + nret > ARRAY_SIZE(args.args)) return -EINVAL; -- cgit From 799010244685334b34e674d354a1a71a3a6b6148 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 1 Jul 2016 16:20:39 +1000 Subject: powerpc/timer: Large Decrementer support Power ISAv3 adds a large decrementer (LD) mode which increases the size of the decrementer register. The size of the enlarged decrementer register is between 32 and 64 bits with the exact size being dependent on the implementation. When in LD mode, reads are sign extended to 64 bits and a decrementer exception is raised when the high bit is set (i.e the value goes below zero). Writes however are truncated to the physical register width so some care needs to be taken to ensure that the high bit is not set when reloading the decrementer. This patch adds support for using the LD inside the host kernel on processors that support it. When LD mode is supported firmware will supply the ibm,dec-bits property for CPU nodes to allow the kernel to determine the maximum decrementer value. Enabling LD mode is a hypervisor privileged operation so the kernel can only enable it manually when running in hypervisor mode. Guests that support LD mode can request it using the "ibm,client-architecture-support" firmware call (not implemented in this patch) or some other platform specific method. If this property is not supplied then the traditional decrementer width of 32 bit is assumed and LD mode will not be enabled. This patch was based on initial work by Jack Miller. Signed-off-by: Oliver O'Halloran Signed-off-by: Balbir Singh Acked-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/time.c | 67 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 59 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 3ed9a5a21d77..6b4d01d1ccf0 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -96,7 +96,8 @@ static struct clocksource clocksource_timebase = { .read = timebase_read, }; -#define DECREMENTER_MAX 0x7fffffff +#define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF +u64 decrementer_max = DECREMENTER_DEFAULT_MAX; static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev); @@ -504,8 +505,8 @@ static void __timer_interrupt(void) __this_cpu_inc(irq_stat.timer_irqs_event); } else { now = *next_tb - now; - if (now <= DECREMENTER_MAX) - set_dec((int)now); + if (now <= decrementer_max) + set_dec(now); /* We may have raced with new irq work */ if (test_irq_work_pending()) set_dec(1); @@ -535,7 +536,7 @@ void timer_interrupt(struct pt_regs * regs) /* Ensure a positive value is written to the decrementer, or else * some CPUs will continue to take decrementer exceptions. */ - set_dec(DECREMENTER_MAX); + set_dec(decrementer_max); /* Some implementations of hotplug will get timer interrupts while * offline, just ignore these and we also need to set @@ -583,9 +584,9 @@ static void generic_suspend_disable_irqs(void) * with suspending. */ - set_dec(DECREMENTER_MAX); + set_dec(decrementer_max); local_irq_disable(); - set_dec(DECREMENTER_MAX); + set_dec(decrementer_max); } static void generic_suspend_enable_irqs(void) @@ -866,7 +867,7 @@ static int decrementer_set_next_event(unsigned long evt, static int decrementer_shutdown(struct clock_event_device *dev) { - decrementer_set_next_event(DECREMENTER_MAX, dev); + decrementer_set_next_event(decrementer_max, dev); return 0; } @@ -892,6 +893,49 @@ static void register_decrementer_clockevent(int cpu) clockevents_register_device(dec); } +static void enable_large_decrementer(void) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + return; + + if (decrementer_max <= DECREMENTER_DEFAULT_MAX) + return; + + /* + * If we're running as the hypervisor we need to enable the LD manually + * otherwise firmware should have done it for us. + */ + if (cpu_has_feature(CPU_FTR_HVMODE)) + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_LD); +} + +static void __init set_decrementer_max(void) +{ + struct device_node *cpu; + u32 bits = 32; + + /* Prior to ISAv3 the decrementer is always 32 bit */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + return; + + cpu = of_find_node_by_type(NULL, "cpu"); + + if (of_property_read_u32(cpu, "ibm,dec-bits", &bits) == 0) { + if (bits > 64 || bits < 32) { + pr_warn("time_init: firmware supplied invalid ibm,dec-bits"); + bits = 32; + } + + /* calculate the signed maximum given this many bits */ + decrementer_max = (1ul << (bits - 1)) - 1; + } + + of_node_put(cpu); + + pr_info("time_init: %u bit decrementer (max: %llx)\n", + bits, decrementer_max); +} + static void __init init_decrementer_clockevent(void) { int cpu = smp_processor_id(); @@ -899,7 +943,7 @@ static void __init init_decrementer_clockevent(void) clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4); decrementer_clockevent.max_delta_ns = - clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent); + clockevent_delta2ns(decrementer_max, &decrementer_clockevent); decrementer_clockevent.min_delta_ns = clockevent_delta2ns(2, &decrementer_clockevent); @@ -908,6 +952,9 @@ static void __init init_decrementer_clockevent(void) void secondary_cpu_time_init(void) { + /* Enable and test the large decrementer for this cpu */ + enable_large_decrementer(); + /* Start the decrementer on CPUs that have manual control * such as BookE */ @@ -973,6 +1020,10 @@ void __init time_init(void) vdso_data->tb_update_count = 0; vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; + /* initialise and enable the large decrementer (if we have one) */ + set_decrementer_max(); + enable_large_decrementer(); + /* Start the decrementer on CPUs that have manual control * such as BookE */ -- cgit From d468fcafb7a42f4e5a73219692dc4fd34b8440f3 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 5 Jul 2016 14:07:07 +1000 Subject: powerpc/pci: Fix build with PCI_IOV=y and EEH=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Despite attempting to fix this in commit fb36e9073693 ("powerpc/pci: Fix SRIOV not building without EEH enabled"), the build is still broken when PCI_IOV=y and EEH=n (eg. g5_defconfig with PCI_IOV=y): arch/powerpc/kernel/pci_dn.c: In function ‘remove_dev_pci_data’: arch/powerpc/kernel/pci_dn.c:230:18: error: unused variable ‘edev’ Incorporate Ben's idea of using __maybe_unused to avoid so many #ifdefs. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci_dn.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index bfe60a1e70d9..592693437070 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -31,6 +31,7 @@ #include #include #include +#include /* * The function is used to find the firmware data of one @@ -181,9 +182,6 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) { #ifdef CONFIG_PCI_IOV struct pci_dn *parent, *pdn; -#ifdef CONFIG_EEH - struct eeh_dev *edev; -#endif /* CONFIG_EEH */ int i; /* Only support IOV for now */ @@ -201,6 +199,8 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) return NULL; for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) { + struct eeh_dev *edev __maybe_unused; + pdn = add_one_dev_pci_data(parent, NULL, i, pci_iov_virtfn_bus(pdev, i), pci_iov_virtfn_devfn(pdev, i)); @@ -227,7 +227,6 @@ void remove_dev_pci_data(struct pci_dev *pdev) #ifdef CONFIG_PCI_IOV struct pci_dn *parent; struct pci_dn *pdn, *tmp; - struct eeh_dev *edev; int i; /* @@ -263,6 +262,8 @@ void remove_dev_pci_data(struct pci_dev *pdev) * a batch mode. */ for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) { + struct eeh_dev *edev __maybe_unused; + list_for_each_entry_safe(pdn, tmp, &parent->child_list, list) { if (pdn->busno != pci_iov_virtfn_bus(pdev, i) || -- cgit From 63a72284b159c569ec52f380c9a8dd9342d43bb8 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Wed, 29 Jun 2016 15:14:22 -0300 Subject: powerpc/pci: Assign fixed PHB number based on device-tree properties The domain/PHB field of PCI addresses has its value obtained from a global variable, incremented each time a new domain (represented by struct pci_controller) is added on the system. The domain addition process happens during boot or due to PHB hotplug add. As recent kernels are using predictable naming for network interfaces, the network stack is more tied to PCI naming. This can be a problem in hotplug scenarios, because PCI addresses will change if devices are removed and then re-added. This situation seems unusual, but it can happen if a user wants to replace a NIC without rebooting the machine, for example. This patch changes the way PCI domain values are generated: now, we use device-tree properties to assign fixed PHB numbers to PCI addresses when available (meaning pSeries and PowerNV cases). We also use a bitmap to allow dynamic PHB numbering when device-tree properties are not used. This bitmap keeps track of used PHB numbers and if a PHB is released (by hotplug operations for example), it allows the reuse of this PHB number, avoiding PCI address to change in case of device remove and re-add soon after. No functional changes were introduced. Signed-off-by: Guilherme G. Piccoli Reviewed-by: Gavin Shan Reviewed-by: Ian Munsie Acked-by: Gavin Shan [mpe: Drop unnecessary machine_is(pseries) test] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci-common.c | 54 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index d1f91e1a813b..c6ac4f01dd56 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -41,11 +41,18 @@ #include #include +/* hose_spinlock protects accesses to the the phb_bitmap. */ static DEFINE_SPINLOCK(hose_spinlock); LIST_HEAD(hose_list); -/* XXX kill that some day ... */ -static int global_phb_number; /* Global phb counter */ +/* For dynamic PHB numbering on get_phb_number(): max number of PHBs. */ +#define MAX_PHBS 0x10000 + +/* + * For dynamic PHB numbering: used/free PHBs tracking bitmap. + * Accesses to this bitmap should be protected by hose_spinlock. + */ +static DECLARE_BITMAP(phb_bitmap, MAX_PHBS); /* ISA Memory physical address */ resource_size_t isa_mem_base; @@ -64,6 +71,42 @@ struct dma_map_ops *get_pci_dma_ops(void) } EXPORT_SYMBOL(get_pci_dma_ops); +/* + * This function should run under locking protection, specifically + * hose_spinlock. + */ +static int get_phb_number(struct device_node *dn) +{ + int ret, phb_id = -1; + u64 prop; + + /* + * Try fixed PHB numbering first, by checking archs and reading + * the respective device-tree properties. Firstly, try powernv by + * reading "ibm,opal-phbid", only present in OPAL environment. + */ + ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop); + if (ret) + ret = of_property_read_u32_index(dn, "reg", 1, (u32 *)&prop); + + if (!ret) + phb_id = (int)(prop & (MAX_PHBS - 1)); + + /* We need to be sure to not use the same PHB number twice. */ + if ((phb_id >= 0) && !test_and_set_bit(phb_id, phb_bitmap)) + return phb_id; + + /* + * If not pseries nor powernv, or if fixed PHB numbering tried to add + * the same PHB number twice, then fallback to dynamic PHB numbering. + */ + phb_id = find_first_zero_bit(phb_bitmap, MAX_PHBS); + BUG_ON(phb_id >= MAX_PHBS); + set_bit(phb_id, phb_bitmap); + + return phb_id; +} + struct pci_controller *pcibios_alloc_controller(struct device_node *dev) { struct pci_controller *phb; @@ -72,7 +115,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev) if (phb == NULL) return NULL; spin_lock(&hose_spinlock); - phb->global_number = global_phb_number++; + phb->global_number = get_phb_number(dev); list_add_tail(&phb->list_node, &hose_list); spin_unlock(&hose_spinlock); phb->dn = dev; @@ -94,6 +137,11 @@ EXPORT_SYMBOL_GPL(pcibios_alloc_controller); void pcibios_free_controller(struct pci_controller *phb) { spin_lock(&hose_spinlock); + + /* Clear bit of phb_bitmap to allow reuse of this PHB number. */ + if (phb->global_number < MAX_PHBS) + clear_bit(phb->global_number, phb_bitmap); + list_del(&phb->list_node); spin_unlock(&hose_spinlock); -- cgit From 8c6a0a1f4041f19559538649e0b9f3d9224b03a8 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 15 Jun 2016 22:26:41 +0200 Subject: powerpc/pseries: start rtasd before PCI probing A strange behaviour is observed when comparing PCI hotplug in QEMU, between x86 and pseries. If you consider the following steps: - start a VM - add a PCI device via the QEMU monitor before the rtasd has started (for example starting the VM in paused state, or hotplug during FW or boot loader) - resume the VM execution The x86 kernel detects the PCI device, but the pseries one does not. This happens because the rtasd kernel worker is currently started under device_initcall, while PCI probing happens earlier under subsys_initcall. As a consequence, if we have a pending RTAS event at boot time, a message is printed and the event is dropped. This patch moves all the initialization of rtasd to arch_initcall, which is run before subsys_call: this way, logging_enabled is true when the RTAS event pops up and it is not lost anymore. The proc fs bits stay at device_initcall because they cannot be run before fs_initcall. Signed-off-by: Greg Kurz Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/rtasd.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index e864b7c5884e..a26a02006576 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -526,10 +526,8 @@ void rtas_cancel_event_scan(void) } EXPORT_SYMBOL_GPL(rtas_cancel_event_scan); -static int __init rtas_init(void) +static int __init rtas_event_scan_init(void) { - struct proc_dir_entry *entry; - if (!machine_is(pseries) && !machine_is(chrp)) return 0; @@ -562,13 +560,27 @@ static int __init rtas_init(void) return -ENOMEM; } + start_event_scan(); + + return 0; +} +arch_initcall(rtas_event_scan_init); + +static int __init rtas_init(void) +{ + struct proc_dir_entry *entry; + + if (!machine_is(pseries) && !machine_is(chrp)) + return 0; + + if (!rtas_log_buf) + return -ENODEV; + entry = proc_create("powerpc/rtas/error_log", S_IRUSR, NULL, &proc_rtas_log_operations); if (!entry) printk(KERN_ERR "Failed to create error_log proc entry\n"); - start_event_scan(); - return 0; } __initcall(rtas_init); -- cgit From 91dc068202a61741a458232de7de0627d6ac9952 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 24 Jun 2016 15:54:22 +1000 Subject: powerpc/eeh: Fix pr_debug()s in eeh_cache.c eeh_cache.c doesn't build cleanly with -DDEBUG when CONFIG_PHYS_ADDR_T_64BIT is set, as a couple of pr_debug()s use "%lx" for resource_size_t parameters. Use "%pap" instead, as it's the correct format specifier for types deriving from phys_addr_t. Signed-off-by: Andrew Donnellan Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_cache.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index ddbcfab7efdf..d4cc26618809 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -114,9 +114,9 @@ static void eeh_addr_cache_print(struct pci_io_addr_cache *cache) while (n) { struct pci_io_addr_range *piar; piar = rb_entry(n, struct pci_io_addr_range, rb_node); - pr_debug("PCI: %s addr range %d [%lx-%lx]: %s\n", + pr_debug("PCI: %s addr range %d [%pap-%pap]: %s\n", (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt, - piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev)); + &piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev)); cnt++; n = rb_next(n); } @@ -159,8 +159,8 @@ eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo, piar->flags = flags; #ifdef DEBUG - pr_debug("PIAR: insert range=[%lx:%lx] dev=%s\n", - alo, ahi, pci_name(dev)); + pr_debug("PIAR: insert range=[%pap:%pap] dev=%s\n", + &alo, &ahi, pci_name(dev)); #endif rb_link_node(&piar->rb_node, parent, p); -- cgit From fa2cff3f54cfec5c0b83afdb4f79975f5447a0b4 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Tue, 5 Jul 2016 16:12:34 +1000 Subject: powerpc: Fix typo in comment reference to CONFIG_TRACE_IRQFLAGS Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 3cb46a3b1de7..58217aec30ea 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -250,7 +250,7 @@ notrace void arch_local_irq_restore(unsigned long en) if (WARN_ON(mfmsr() & MSR_EE)) __hard_irq_disable(); } -#endif /* CONFIG_TRACE_IRQFLAG */ +#endif /* CONFIG_TRACE_IRQFLAGS */ set_soft_enabled(0); -- cgit From c223c90386bc2306510e0ceacd768a0123ff2a2f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 17 May 2016 08:33:46 +0200 Subject: powerpc32: provide VIRT_CPU_ACCOUNTING This patch provides VIRT_CPU_ACCOUTING to PPC32 architecture. PPC32 doesn't have the PACA structure, so we use the task_info structure to store the accounting data. In order to reuse on PPC32 the PPC64 functions, all u64 data has been replaced by 'unsigned long' so that it is u32 on PPC32 and u64 on PPC64 Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/asm-offsets.c | 23 ++++++++-- arch/powerpc/kernel/entry_32.S | 17 ++++++++ arch/powerpc/kernel/entry_64.S | 6 +-- arch/powerpc/kernel/exceptions-64e.S | 4 +- arch/powerpc/kernel/time.c | 81 ++++++++++++++++++++++++------------ 5 files changed, 95 insertions(+), 36 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 5b99f956e32f..047892869257 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -240,13 +240,28 @@ int main(void) DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); DEFINE(PACA_DSCR_DEFAULT, offsetof(struct paca_struct, dscr_default)); - DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime)); - DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user)); - DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); - DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); + DEFINE(ACCOUNT_STARTTIME, + offsetof(struct paca_struct, accounting.starttime)); + DEFINE(ACCOUNT_STARTTIME_USER, + offsetof(struct paca_struct, accounting.starttime_user)); + DEFINE(ACCOUNT_USER_TIME, + offsetof(struct paca_struct, accounting.user_time)); + DEFINE(ACCOUNT_SYSTEM_TIME, + offsetof(struct paca_struct, accounting.system_time)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost)); DEFINE(PACA_SPRG_VDSO, offsetof(struct paca_struct, sprg_vdso)); +#else /* CONFIG_PPC64 */ +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + DEFINE(ACCOUNT_STARTTIME, + offsetof(struct thread_info, accounting.starttime)); + DEFINE(ACCOUNT_STARTTIME_USER, + offsetof(struct thread_info, accounting.starttime_user)); + DEFINE(ACCOUNT_USER_TIME, + offsetof(struct thread_info, accounting.user_time)); + DEFINE(ACCOUNT_SYSTEM_TIME, + offsetof(struct thread_info, accounting.system_time)); +#endif #endif /* CONFIG_PPC64 */ /* RTAS */ diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 2405631e91a2..9899032230b4 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -175,6 +175,12 @@ transfer_to_handler: addi r12,r12,-1 stw r12,4(r11) #endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + CURRENT_THREAD_INFO(r9, r1) + tophys(r9, r9) + ACCOUNT_CPU_USER_ENTRY(r9, r11, r12) +#endif + b 3f 2: /* if from kernel, check interrupted DOZE/NAP mode and @@ -398,6 +404,13 @@ BEGIN_FTR_SECTION lwarx r7,0,r1 END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) stwcx. r0,0,r1 /* to clear the reservation */ +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + andi. r4,r8,MSR_PR + beq 3f + CURRENT_THREAD_INFO(r4, r1) + ACCOUNT_CPU_USER_EXIT(r4, r5, r7) +3: +#endif lwz r4,_LINK(r1) lwz r5,_CCR(r1) mtlr r4 @@ -769,6 +782,10 @@ restore_user: andis. r10,r0,DBCR0_IDM@h bnel- load_dbcr0 #endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + CURRENT_THREAD_INFO(r9, r1) + ACCOUNT_CPU_USER_EXIT(r9, r10, r11) +#endif b restore diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 2e0c565754aa..fcb2887f5a33 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -72,7 +72,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) std r0,GPR0(r1) std r10,GPR1(r1) beq 2f /* if from kernel mode */ - ACCOUNT_CPU_USER_ENTRY(r10, r11) + ACCOUNT_CPU_USER_ENTRY(r13, r10, r11) 2: std r2,GPR2(r1) std r3,GPR3(r1) mfcr r2 @@ -246,7 +246,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) ld r4,_LINK(r1) beq- 1f - ACCOUNT_CPU_USER_EXIT(r11, r12) + ACCOUNT_CPU_USER_EXIT(r13, r11, r12) BEGIN_FTR_SECTION HMT_MEDIUM_LOW @@ -859,7 +859,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) BEGIN_FTR_SECTION mtspr SPRN_PPR,r2 /* Restore PPR */ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - ACCOUNT_CPU_USER_EXIT(r2, r4) + ACCOUNT_CPU_USER_EXIT(r13, r2, r4) REST_GPR(13, r1) 1: mtspr SPRN_SRR1,r3 diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 2d3b40fd9bac..38a1f96430e1 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -386,7 +386,7 @@ exc_##n##_common: \ std r10,_NIP(r1); /* save SRR0 to stackframe */ \ std r11,_MSR(r1); /* save SRR1 to stackframe */ \ beq 2f; /* if from kernel mode */ \ - ACCOUNT_CPU_USER_ENTRY(r10,r11);/* accounting (uses cr0+eq) */ \ + ACCOUNT_CPU_USER_ENTRY(r13,r10,r11);/* accounting (uses cr0+eq) */ \ 2: ld r3,excf+EX_R10(r13); /* get back r10 */ \ ld r4,excf+EX_R11(r13); /* get back r11 */ \ mfspr r5,scratch; /* get back r13 */ \ @@ -1059,7 +1059,7 @@ fast_exception_return: andi. r6,r10,MSR_PR REST_2GPRS(6, r1) beq 1f - ACCOUNT_CPU_USER_EXIT(r10, r11) + ACCOUNT_CPU_USER_EXIT(r13, r10, r11) ld r0,GPR13(r1) 1: stdcx. r0,0,r1 /* to clear the reservation */ diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 6b4d01d1ccf0..4e7759c8ca30 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -167,7 +167,15 @@ DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta); cputime_t cputime_one_jiffy; +#ifdef CONFIG_PPC_SPLPAR void (*dtl_consumer)(struct dtl_entry *, u64); +#endif + +#ifdef CONFIG_PPC64 +#define get_accounting(tsk) (&get_paca()->accounting) +#else +#define get_accounting(tsk) (&task_thread_info(tsk)->accounting) +#endif static void calc_cputime_factors(void) { @@ -187,7 +195,7 @@ static void calc_cputime_factors(void) * Read the SPURR on systems that have it, otherwise the PURR, * or if that doesn't exist return the timebase value passed in. */ -static u64 read_spurr(u64 tb) +static unsigned long read_spurr(unsigned long tb) { if (cpu_has_feature(CPU_FTR_SPURR)) return mfspr(SPRN_SPURR); @@ -250,8 +258,8 @@ static u64 scan_dispatch_log(u64 stop_tb) void accumulate_stolen_time(void) { u64 sst, ust; - u8 save_soft_enabled = local_paca->soft_enabled; + struct cpu_accounting_data *acct = &local_paca->accounting; /* We are called early in the exception entry, before * soft/hard_enabled are sync'ed to the expected state @@ -261,10 +269,10 @@ void accumulate_stolen_time(void) */ local_paca->soft_enabled = 0; - sst = scan_dispatch_log(local_paca->starttime_user); - ust = scan_dispatch_log(local_paca->starttime); - local_paca->system_time -= sst; - local_paca->user_time -= ust; + sst = scan_dispatch_log(acct->starttime_user); + ust = scan_dispatch_log(acct->starttime); + acct->system_time -= sst; + acct->user_time -= ust; local_paca->stolen_time += ust + sst; local_paca->soft_enabled = save_soft_enabled; @@ -276,7 +284,7 @@ static inline u64 calculate_stolen_time(u64 stop_tb) if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) { stolen = scan_dispatch_log(stop_tb); - get_paca()->system_time -= stolen; + get_paca()->accounting.system_time -= stolen; } stolen += get_paca()->stolen_time; @@ -296,27 +304,29 @@ static inline u64 calculate_stolen_time(u64 stop_tb) * Account time for a transition between system, hard irq * or soft irq state. */ -static u64 vtime_delta(struct task_struct *tsk, - u64 *sys_scaled, u64 *stolen) +static unsigned long vtime_delta(struct task_struct *tsk, + unsigned long *sys_scaled, + unsigned long *stolen) { - u64 now, nowscaled, deltascaled; - u64 udelta, delta, user_scaled; + unsigned long now, nowscaled, deltascaled; + unsigned long udelta, delta, user_scaled; + struct cpu_accounting_data *acct = get_accounting(tsk); WARN_ON_ONCE(!irqs_disabled()); now = mftb(); nowscaled = read_spurr(now); - get_paca()->system_time += now - get_paca()->starttime; - get_paca()->starttime = now; - deltascaled = nowscaled - get_paca()->startspurr; - get_paca()->startspurr = nowscaled; + acct->system_time += now - acct->starttime; + acct->starttime = now; + deltascaled = nowscaled - acct->startspurr; + acct->startspurr = nowscaled; *stolen = calculate_stolen_time(now); - delta = get_paca()->system_time; - get_paca()->system_time = 0; - udelta = get_paca()->user_time - get_paca()->utime_sspurr; - get_paca()->utime_sspurr = get_paca()->user_time; + delta = acct->system_time; + acct->system_time = 0; + udelta = acct->user_time - acct->utime_sspurr; + acct->utime_sspurr = acct->user_time; /* * Because we don't read the SPURR on every kernel entry/exit, @@ -338,14 +348,14 @@ static u64 vtime_delta(struct task_struct *tsk, *sys_scaled = deltascaled; } } - get_paca()->user_time_scaled += user_scaled; + acct->user_time_scaled += user_scaled; return delta; } void vtime_account_system(struct task_struct *tsk) { - u64 delta, sys_scaled, stolen; + unsigned long delta, sys_scaled, stolen; delta = vtime_delta(tsk, &sys_scaled, &stolen); account_system_time(tsk, 0, delta, sys_scaled); @@ -356,7 +366,7 @@ EXPORT_SYMBOL_GPL(vtime_account_system); void vtime_account_idle(struct task_struct *tsk) { - u64 delta, sys_scaled, stolen; + unsigned long delta, sys_scaled, stolen; delta = vtime_delta(tsk, &sys_scaled, &stolen); account_idle_time(delta + stolen); @@ -374,15 +384,32 @@ void vtime_account_idle(struct task_struct *tsk) void vtime_account_user(struct task_struct *tsk) { cputime_t utime, utimescaled; + struct cpu_accounting_data *acct = get_accounting(tsk); - utime = get_paca()->user_time; - utimescaled = get_paca()->user_time_scaled; - get_paca()->user_time = 0; - get_paca()->user_time_scaled = 0; - get_paca()->utime_sspurr = 0; + utime = acct->user_time; + utimescaled = acct->user_time_scaled; + acct->user_time = 0; + acct->user_time_scaled = 0; + acct->utime_sspurr = 0; account_user_time(tsk, utime, utimescaled); } +#ifdef CONFIG_PPC32 +/* + * Called from the context switch with interrupts disabled, to charge all + * accumulated times to the current process, and to prepare accounting on + * the next process. + */ +void arch_vtime_task_switch(struct task_struct *prev) +{ + struct cpu_accounting_data *acct = get_accounting(current); + + acct->starttime = get_accounting(prev)->starttime; + acct->system_time = 0; + acct->user_time = 0; +} +#endif /* CONFIG_PPC32 */ + #else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #define calc_cputime_factors() #endif -- cgit From f86ef74ed9193c52411277eeac2eec69af553392 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 17 May 2016 09:02:43 +0200 Subject: powerpc/8xx: Fix vaddr for IMMR early remap Memory: 124428K/131072K available (3748K kernel code, 188K rwdata, 648K rodata, 508K init, 290K bss, 6644K reserved) Kernel virtual memory layout: * 0xfffdf000..0xfffff000 : fixmap * 0xfde00000..0xfe000000 : consistent mem * 0xfddf6000..0xfde00000 : early ioremap * 0xc9000000..0xfddf6000 : vmalloc & ioremap SLUB: HWalign=16, Order=0-3, MinObjects=0, CPUs=1, Nodes=1 Today, IMMR is mapped 1:1 at startup Mapping IMMR 1:1 is just wrong because it may overlap with another area. On most mpc8xx boards it is OK as IMMR is set to 0xff000000 but for instance on EP88xC board, IMMR is at 0xfa200000 which overlaps with VM ioremap area This patch fixes the virtual address for remapping IMMR with the fixmap regardless of the value of IMMR. The size of IMMR area is 256kbytes (CPM at offset 0, security engine at offset 128k) so a 512k page is enough Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/asm-offsets.c | 8 ++++++++ arch/powerpc/kernel/head_8xx.S | 11 ++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 047892869257..247f6407c7d8 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -68,6 +68,10 @@ #include "../mm/mmu_decl.h" #endif +#ifdef CONFIG_PPC_8xx +#include +#endif + int main(void) { DEFINE(THREAD, offsetof(struct task_struct, thread)); @@ -749,5 +753,9 @@ int main(void) DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER); +#ifdef CONFIG_PPC_8xx + DEFINE(VIRT_IMMR_BASE, __fix_to_virt(FIX_IMMR_BASE)); +#endif + return 0; } diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 80c69472314e..378a1858687d 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -30,6 +30,7 @@ #include #include #include +#include /* Macro to make the code more readable. */ #ifdef CONFIG_8xx_CPU6 @@ -763,7 +764,7 @@ start_here: * virtual to physical. Also, set the cache mode since that is defined * by TLB entries and perform any additional mapping (like of the IMMR). * If configured to pin some TLBs, we pin the first 8 Mbytes of kernel, - * 24 Mbytes of data, and the 8M IMMR space. Anything not covered by + * 24 Mbytes of data, and the 512k IMMR space. Anything not covered by * these mappings is mapped by page tables. */ initial_mmu: @@ -812,7 +813,7 @@ initial_mmu: ori r8, r8, MD_APG_INIT@l mtspr SPRN_MD_AP, r8 - /* Map another 8 MByte at the IMMR to get the processor + /* Map a 512k page for the IMMR to get the processor * internal registers (among other things). */ #ifdef CONFIG_PIN_TLB @@ -820,12 +821,12 @@ initial_mmu: mtspr SPRN_MD_CTR, r10 #endif mfspr r9, 638 /* Get current IMMR */ - andis. r9, r9, 0xff80 /* Get 8Mbyte boundary */ + andis. r9, r9, 0xfff8 /* Get 512 kbytes boundary */ - mr r8, r9 /* Create vaddr for TLB */ + lis r8, VIRT_IMMR_BASE@h /* Create vaddr for TLB */ ori r8, r8, MD_EVALID /* Mark it valid */ mtspr SPRN_MD_EPN, r8 - li r8, MD_PS8MEG /* Set 8M byte page */ + li r8, MD_PS512K | MD_GUARDED /* Set 512k byte page */ ori r8, r8, MD_SVALID /* Make it valid */ mtspr SPRN_MD_TWC, r8 mr r8, r9 /* Create paddr for TLB */ -- cgit From 4badd43ae44109c88438cc6421d208f513cf537f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 17 May 2016 09:02:45 +0200 Subject: powerpc/8xx: Map IMMR area with 512k page at a fixed address Once the linear memory space has been mapped with 8Mb pages, as seen in the related commit, we get 11 millions DTLB missed during the reference 600s period. 77% of the misses are on user addresses and 23% are on kernel addresses (1 fourth for linear address space and 3 fourth for virtual address space) Traditionaly, each driver manages one computer board which has its own components with its own memory maps. But on embedded chips like the MPC8xx, the SOC has all registers located in the same IO area. When looking at ioremaps done during startup, we see that many drivers are re-mapping small parts of the IMMR for their own use and all those small pieces gets their own 4k page, amplifying the number of TLB misses: in our system we get 0xff000000 mapped 31 times and 0xff003000 mapped 9 times. Even if each part of IMMR was mapped only once with 4k pages, it would still be several small mappings towards linear area. This patch maps the IMMR with a single 512k page. With this patch applied, the number of DTLB misses during the 10 min period is reduced to 11.8 millions for a duration of 5.8s, which represents 2% of the non-idle time hence yet another 10% reduction. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 378a1858687d..44f4edbd5dee 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -384,6 +384,27 @@ InstructionTLBMiss: EXCEPTION_EPILOG_0 rfi +/* + * Bottom part of DataStoreTLBMiss handler for IMMR area + * not enough space in the DataStoreTLBMiss area + */ +DTLBMissIMMR: + mtcr r3 + /* Set 512k byte guarded page and mark it valid */ + li r10, MD_PS512K | MD_GUARDED | MD_SVALID + MTSPR_CPU6(SPRN_MD_TWC, r10, r3) + mfspr r10, SPRN_IMMR /* Get current IMMR */ + rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */ + ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ + _PAGE_PRESENT | _PAGE_NO_CACHE + MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ + + li r11, RPN_PATTERN + mfspr r3, SPRN_SPRG_SCRATCH2 + mtspr SPRN_DAR, r11 /* Tag DAR */ + EXCEPTION_EPILOG_0 + rfi + . = 0x1200 DataStoreTLBMiss: mtspr SPRN_SPRG_SCRATCH2, r3 @@ -397,6 +418,14 @@ DataStoreTLBMiss: IS_KERNEL(r11, r10) mfspr r11, SPRN_M_TW /* Get level 1 table */ BRANCH_UNLESS_KERNEL(3f) + + rlwinm r11, r10, 16, 0xfff8 +#ifndef CONFIG_PIN_TLB + cmpli cr0, r11, VIRT_IMMR_BASE@h +_ENTRY(DTLBMiss_jmp) + beq- DTLBMissIMMR +#endif + lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: -- cgit From 6264dbb98ff762d71c65e04ae3b2e632d28a5b84 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 17 May 2016 09:02:49 +0200 Subject: powerpc/8xx: unpin all TLBs before flushing Bootloader may have pinned some TLB entries so the kernel must unpin them before flushing TLBs with tlbia otherwise pinned TLB entries won't get flushed Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 44f4edbd5dee..d9a165629202 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -797,6 +797,14 @@ start_here: * these mappings is mapped by page tables. */ initial_mmu: + li r8, 0 + mtspr SPRN_MI_CTR, r8 /* remove PINNED ITLB entries */ + lis r10, MD_RESETVAL@h +#ifndef CONFIG_8xx_COPYBACK + oris r10, r10, MD_WTDEF@h +#endif + mtspr SPRN_MD_CTR, r10 /* remove PINNED DTLB entries */ + tlbia /* Invalidate all TLB entries */ /* Always pin the first 8 MB ITLB to prevent ITLB misses while mucking around with SRR0/SRR1 in asm @@ -807,16 +815,10 @@ initial_mmu: mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */ #ifdef CONFIG_PIN_TLB - lis r10, (MD_RSV4I | MD_RESETVAL)@h + oris r10, r10, MD_RSV4I@h ori r10, r10, 0x1c00 - mr r8, r10 -#else - lis r10, MD_RESETVAL@h -#endif -#ifndef CONFIG_8xx_COPYBACK - oris r10, r10, MD_WTDEF@h -#endif mtspr SPRN_MD_CTR, r10 /* Set data TLB control */ +#endif /* Now map the lower 8 Meg into the TLBs. For this quick hack, * we can load the instruction and data TLB registers with the -- cgit From bb7f380849f8c8722ea383ec5867a79d365d4574 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 17 May 2016 09:02:51 +0200 Subject: powerpc/8xx: Don't use page table for linear memory space Instead of using the first level page table to define mappings for the linear memory space, we can use direct mapping from the TLB handling routines. This has several advantages: * No need to read the tables at each TLB miss * No issue in 16k pages mode where the 1st level table maps 64 Mbytes The size of the available linear space is known at system startup. In order to avoid data access at each TLB miss to know the memory size, the TLB routine is patched at startup with the proper size This patch provides a 10%-15% improvment of TLB miss handling for kernel addresses Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 71 ++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 34 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index d9a165629202..3de7d02c36ce 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -389,52 +389,52 @@ InstructionTLBMiss: * not enough space in the DataStoreTLBMiss area */ DTLBMissIMMR: - mtcr r3 + mtcr r10 /* Set 512k byte guarded page and mark it valid */ li r10, MD_PS512K | MD_GUARDED | MD_SVALID - MTSPR_CPU6(SPRN_MD_TWC, r10, r3) + MTSPR_CPU6(SPRN_MD_TWC, r10, r11) mfspr r10, SPRN_IMMR /* Get current IMMR */ rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ _PAGE_PRESENT | _PAGE_NO_CACHE - MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ + MTSPR_CPU6(SPRN_MD_RPN, r10, r11) /* Update TLB entry */ li r11, RPN_PATTERN - mfspr r3, SPRN_SPRG_SCRATCH2 mtspr SPRN_DAR, r11 /* Tag DAR */ EXCEPTION_EPILOG_0 rfi . = 0x1200 DataStoreTLBMiss: - mtspr SPRN_SPRG_SCRATCH2, r3 EXCEPTION_PROLOG_0 - mfcr r3 + mfcr r10 /* If we are faulting a kernel address, we have to use the * kernel page tables. */ - mfspr r10, SPRN_MD_EPN - IS_KERNEL(r11, r10) - mfspr r11, SPRN_M_TW /* Get level 1 table */ - BRANCH_UNLESS_KERNEL(3f) - - rlwinm r11, r10, 16, 0xfff8 + mfspr r11, SPRN_MD_EPN + rlwinm r11, r11, 16, 0xfff8 #ifndef CONFIG_PIN_TLB cmpli cr0, r11, VIRT_IMMR_BASE@h +#endif + cmpli cr7, r11, PAGE_OFFSET@h +#ifndef CONFIG_PIN_TLB _ENTRY(DTLBMiss_jmp) beq- DTLBMissIMMR #endif + bge- cr7, 4f - lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha + mfspr r11, SPRN_M_TW /* Get level 1 table */ 3: + mtcr r10 +#ifdef CONFIG_8xx_CPU6 + mtspr SPRN_SPRG_SCRATCH2, r3 +#endif + mfspr r10, SPRN_MD_EPN /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ - mtcr r11 - bt- 28,DTLBMiss8M /* bit 28 = Large page (8M) */ - mtcr r3 /* We have a pte table, so load fetch the pte from the table. */ @@ -482,29 +482,30 @@ _ENTRY(DTLBMiss_jmp) MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ /* Restore registers */ +#ifdef CONFIG_8xx_CPU6 mfspr r3, SPRN_SPRG_SCRATCH2 +#endif mtspr SPRN_DAR, r11 /* Tag DAR */ EXCEPTION_EPILOG_0 rfi -DTLBMiss8M: - mtcr r3 - ori r11, r11, MD_SVALID - MTSPR_CPU6(SPRN_MD_TWC, r11, r3) -#ifdef CONFIG_PPC_16K_PAGES - /* - * In 16k pages mode, each PGD entry defines a 64M block. - * Here we select the 8M page within the block. - */ - rlwimi r11, r10, 0, 0x03800000 -#endif - rlwinm r10, r11, 0, 0xff800000 +4: +_ENTRY(DTLBMiss_cmp) + cmpli cr0, r11, PAGE_OFFSET@h + lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha + bge- 3b + + mtcr r10 + /* Set 8M byte page and mark it valid */ + li r10, MD_PS8MEG | MD_SVALID + MTSPR_CPU6(SPRN_MD_TWC, r10, r11) + mfspr r10, SPRN_MD_EPN + rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ _PAGE_PRESENT - MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ + MTSPR_CPU6(SPRN_MD_RPN, r10, r11) /* Update TLB entry */ li r11, RPN_PATTERN - mfspr r3, SPRN_SPRG_SCRATCH2 mtspr SPRN_DAR, r11 /* Tag DAR */ EXCEPTION_EPILOG_0 rfi @@ -583,12 +584,14 @@ FixupDAR:/* Entry point for dcbx workaround. */ IS_KERNEL(r11, r10) mfspr r11, SPRN_M_TW /* Get level 1 table */ BRANCH_UNLESS_KERNEL(3f) + rlwinm r11, r10, 16, 0xfff8 +_ENTRY(FixupDAR_cmp) + cmpli cr7, r11, PAGE_OFFSET@h + blt- cr7, 200f lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha /* Insert level 1 index */ 3: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ - mtcr r11 - bt 28,200f /* bit 28 = Large page (8M) */ rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */ /* Insert level 2 index */ rlwimi r11, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 @@ -614,8 +617,8 @@ FixupDAR:/* Entry point for dcbx workaround. */ 141: mfspr r10,SPRN_SPRG_SCRATCH2 b DARFixed /* Nope, go back to normal TLB processing */ - /* concat physical page address(r11) and page offset(r10) */ -200: rlwimi r11, r10, 0, 32 - (PAGE_SHIFT << 1), 31 + /* create physical page address from effective address */ +200: tophys(r11, r10) b 201b 144: mfspr r10, SPRN_DSISR -- cgit From 4ad274502f66614eec3093aaa0cdeb4b70697ddf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 17 May 2016 09:02:54 +0200 Subject: powerpc/8xx: Rework CONFIG_PIN_TLB handling On recent kernels, with some debug options like for instance CONFIG_LOCKDEP, the BSS requires more than 8M memory, allthough the kernel code fits in the first 8M. Today, it is necessary to activate CONFIG_PIN_TLB to get more than 8M at startup, allthough pinning TLB is not necessary for that. We could have inconditionaly mapped 16 or 24M bytes at startup but some old hardware only have 8M and mapping non-existing RAM would be an issue due to speculative accesses. With the preceding patch however, the TLB entries are populated on demand. By setting up the TLB miss handler to handle up to 24M until the handler is patched for the entire memory space, it is possible to allow access up to more memory without mapping non-existing RAM. It is therefore not needed anymore to map memory data at all at startup. It will be handled by the TLB miss handler. One might still want to PIN the IMMR and the first 24M of RAM. It is now possible to do it in the C memory initialisation functions. In addition, we now know how much memory we have when we do it, so we are able to adapt the pining to the real amount of memory available. So boards with less than 24M can now also benefit from PIN_TLB. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 44 ++++-------------------------------------- 1 file changed, 4 insertions(+), 40 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 3de7d02c36ce..00cc9df7d322 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -491,7 +491,7 @@ _ENTRY(DTLBMiss_jmp) 4: _ENTRY(DTLBMiss_cmp) - cmpli cr0, r11, PAGE_OFFSET@h + cmpli cr0, r11, (PAGE_OFFSET + 0x1800000)@h lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha bge- 3b @@ -586,7 +586,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ BRANCH_UNLESS_KERNEL(3f) rlwinm r11, r10, 16, 0xfff8 _ENTRY(FixupDAR_cmp) - cmpli cr7, r11, PAGE_OFFSET@h + cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h blt- cr7, 200f lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha /* Insert level 1 index */ @@ -823,23 +823,16 @@ initial_mmu: mtspr SPRN_MD_CTR, r10 /* Set data TLB control */ #endif - /* Now map the lower 8 Meg into the TLBs. For this quick hack, - * we can load the instruction and data TLB registers with the - * same values. - */ + /* Now map the lower 8 Meg into the ITLB. */ lis r8, KERNELBASE@h /* Create vaddr for TLB */ ori r8, r8, MI_EVALID /* Mark it valid */ mtspr SPRN_MI_EPN, r8 - mtspr SPRN_MD_EPN, r8 li r8, MI_PS8MEG | (2 << 5) /* Set 8M byte page, APG 2 */ ori r8, r8, MI_SVALID /* Make it valid */ mtspr SPRN_MI_TWC, r8 - li r8, MI_PS8MEG /* Set 8M byte page, APG 0 */ - ori r8, r8, MI_SVALID /* Make it valid */ - mtspr SPRN_MD_TWC, r8 li r8, MI_BOOTINIT /* Create RPN for address 0 */ mtspr SPRN_MI_RPN, r8 /* Store TLB entry */ - mtspr SPRN_MD_RPN, r8 + lis r8, MI_APG_INIT@h /* Set protection modes */ ori r8, r8, MI_APG_INIT@l mtspr SPRN_MI_AP, r8 @@ -851,9 +844,6 @@ initial_mmu: * internal registers (among other things). */ #ifdef CONFIG_PIN_TLB - addi r10, r10, 0x0100 - mtspr SPRN_MD_CTR, r10 -#endif mfspr r9, 638 /* Get current IMMR */ andis. r9, r9, 0xfff8 /* Get 512 kbytes boundary */ @@ -866,32 +856,6 @@ initial_mmu: mr r8, r9 /* Create paddr for TLB */ ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */ mtspr SPRN_MD_RPN, r8 - -#ifdef CONFIG_PIN_TLB - /* Map two more 8M kernel data pages. - */ - addi r10, r10, 0x0100 - mtspr SPRN_MD_CTR, r10 - - lis r8, KERNELBASE@h /* Create vaddr for TLB */ - addis r8, r8, 0x0080 /* Add 8M */ - ori r8, r8, MI_EVALID /* Mark it valid */ - mtspr SPRN_MD_EPN, r8 - li r9, MI_PS8MEG /* Set 8M byte page */ - ori r9, r9, MI_SVALID /* Make it valid */ - mtspr SPRN_MD_TWC, r9 - li r11, MI_BOOTINIT /* Create RPN for address 0 */ - addis r11, r11, 0x0080 /* Add 8M */ - mtspr SPRN_MD_RPN, r11 - - addi r10, r10, 0x0100 - mtspr SPRN_MD_CTR, r10 - - addis r8, r8, 0x0080 /* Add 8M */ - mtspr SPRN_MD_EPN, r8 - mtspr SPRN_MD_TWC, r9 - addis r11, r11, 0x0080 /* Add 8M */ - mtspr SPRN_MD_RPN, r11 #endif /* Since the cache is enabled according to the information we -- cgit From 62f64b49d04dc70687cd713c804fecd80216b2d6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 17 May 2016 09:02:56 +0200 Subject: powerpc/8xx: add CONFIG_PIN_TLB_IMMR CONFIG_PIN_TLB maps IMMR area and the first 24 Mbytes of memory. In some circunstances it might be more interesting to not map IMMR but map 32 Mbytes of memory instead. Therefore we add config option CONFIG_PIN_TLB_IMMR to select if IMMR shall be pinned or not, hence whether we pin 24 or 32 Mbytes of RAM Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 00cc9df7d322..43ddaae42baf 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -414,11 +414,11 @@ DataStoreTLBMiss: */ mfspr r11, SPRN_MD_EPN rlwinm r11, r11, 16, 0xfff8 -#ifndef CONFIG_PIN_TLB +#ifndef CONFIG_PIN_TLB_IMMR cmpli cr0, r11, VIRT_IMMR_BASE@h #endif cmpli cr7, r11, PAGE_OFFSET@h -#ifndef CONFIG_PIN_TLB +#ifndef CONFIG_PIN_TLB_IMMR _ENTRY(DTLBMiss_jmp) beq- DTLBMissIMMR #endif @@ -819,7 +819,6 @@ initial_mmu: #ifdef CONFIG_PIN_TLB oris r10, r10, MD_RSV4I@h - ori r10, r10, 0x1c00 mtspr SPRN_MD_CTR, r10 /* Set data TLB control */ #endif @@ -843,7 +842,10 @@ initial_mmu: /* Map a 512k page for the IMMR to get the processor * internal registers (among other things). */ -#ifdef CONFIG_PIN_TLB +#ifdef CONFIG_PIN_TLB_IMMR + ori r10, r10, 0x1c00 + mtspr SPRN_MD_CTR, r10 + mfspr r9, 638 /* Get current IMMR */ andis. r9, r9, 0xfff8 /* Get 512 kbytes boundary */ -- cgit From 9f595fd8b54809fed13fc30906ef1e90a3fcfbc9 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Sat, 9 Jul 2016 03:22:39 -0500 Subject: powerpc/8xx: Force VIRT_IMMR_BASE to be a positive number The asm-offsets mechanism generates signed numbers, even if the input value is explicitly unsigned. This causes a problem with older binutils (e.g. 2.23), which sign-extend a negative number when @h is applied. Thus, this instruction: cmpli cr0, r11, VIRT_IMMR_BASE@h resulted in this: Error: operand out of range (0xfffffff0 is not between 0x00000000 and 0x0000ffff) By casting to a larger type, we can force the output to be expressed as a positive number. Signed-off-by: Scott Wood Cc: Christophe Leroy --- arch/powerpc/kernel/asm-offsets.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 247f6407c7d8..b89d14c0352c 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -754,7 +754,7 @@ int main(void) DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER); #ifdef CONFIG_PPC_8xx - DEFINE(VIRT_IMMR_BASE, __fix_to_virt(FIX_IMMR_BASE)); + DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE)); #endif return 0; -- cgit From bd7c93cca36911baf2eb2bc386956612af3b842d Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:45 +1000 Subject: powerpc: Update obsolete comments in setup_32.c about entry conditions early_init() is called in-place before kernel relocation and using whatever MMU setup exists at the point the kernel is entered. machine_init() is called after relocation and after some initial mapping of PAGE_OFFSET has been established (typically using BATs on 6xx/7xx/7xxx processors or some form of bolted TLB on others). Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_32.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index d544fa311757..2fc27ace80f9 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -62,9 +62,7 @@ int icache_bsize; int ucache_bsize; /* - * We're called here very early in the boot. We determine the machine - * type and call the appropriate low-level setup functions. - * -- Cort + * We're called here very early in the boot. * * Note that the kernel may be running at an address which is different * from the address that it was linked at, so we must use RELOC/PTRRELOC @@ -105,6 +103,10 @@ notrace unsigned long __init early_init(unsigned long dt_ptr) /* + * This is run before start_kernel(), the kernel has been relocated + * and we are running with enough of the MMU enabled to have our + * proper kernel virtual addresses + * * Find out what kind of machine we're on and save any data we need * from the early boot process (devtree is copied on pmac by prom_init()). * This is called very early on the boot process, after a minimal -- cgit From 63c254a501049f70c53aea602525c6912362079e Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:46 +1000 Subject: powerpc: Add comment explaining the purpose of setup_kdump_trampoline() Anything in early_setup() needs to be justified to be there, in this case, we need the trampolines before we can take exceptions and thus before we turn on the MMU. Also remove a pretty meaningless and misplaced debug message Signed-off-by: Benjamin Herrenschmidt [mpe: Fix comment formatting] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 5530bb55a78b..98f72c6d0ebc 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -279,10 +279,12 @@ void __init early_setup(unsigned long dt_ptr) /* Probe the machine type */ probe_machine(); + /* + * Setup the trampolines from the lowmem exception vectors + * to the kdump kernel when not using a relocatable kernel. + */ setup_kdump_trampoline(); - DBG("Found, Initializing memory management...\n"); - /* Initialize the hash table or TLB handling */ early_init_mmu(); -- cgit From da6a97bf12d57e341029b3624ed112175ecff514 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:44 +1000 Subject: powerpc: Move epapr_paravirt_early_init() to early_init_devtree() The function is called by both 32-bit and 64-bit early setup right after early_init_devtree(). All it does is run yet another early DT parser which is precisely what early_init_devtree() is about, so move it in there. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom.c | 2 ++ arch/powerpc/kernel/setup_32.c | 3 --- arch/powerpc/kernel/setup_64.c | 3 --- 3 files changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 946e34ffeae9..48434be99a07 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -56,6 +56,7 @@ #include #include #include +#include #include @@ -739,6 +740,7 @@ void __init early_init_devtree(void *params) /* Scan and build the list of machine check recoverable ranges */ of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL); #endif + epapr_paravirt_early_init(); DBG(" <- early_init_devtree()\n"); } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 2fc27ace80f9..4abefb525462 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #define DBG(fmt...) @@ -125,8 +124,6 @@ notrace void __init machine_init(u64 dt_ptr) /* Do some early initialization based on the flat device tree */ early_init_devtree(__va(dt_ptr)); - epapr_paravirt_early_init(); - early_init_mmu(); probe_machine(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 98f72c6d0ebc..521846c904ca 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -68,7 +68,6 @@ #include #include #include -#include #include #ifdef DEBUG @@ -270,8 +269,6 @@ void __init early_setup(unsigned long dt_ptr) */ early_init_devtree(__va(dt_ptr)); - epapr_paravirt_early_init(); - /* Now we know the logical id of our boot cpu, setup the paca. */ setup_paca(&paca[boot_cpuid]); fixup_boot_paca(); -- cgit From 484cc1ed3c6b90459f02977f6f5ab7810db18705 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:54 +1000 Subject: powerpc/rtas: Don't test for machine type in rtas_initialize() The test is unnecessary, the FW_FEATURE_LPAR is sufficient as there exist no other LPAR type that has RTAS. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/rtas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 8da209fdf480..286354f00ff6 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1174,7 +1174,7 @@ void __init rtas_initialize(void) * the stop-self token if any */ #ifdef CONFIG_PPC64 - if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) { + if (firmware_has_feature(FW_FEATURE_LPAR)) { rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX); ibm_suspend_me_token = rtas_token("ibm,suspend-me"); } -- cgit From 0f2b3442fb850626d50a9d7e533c9f859ef15e6a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:55 +1000 Subject: powerpc: Don't test for machine type in smp_setup_cpu_maps() The subsequent test for RTAS along with the LPAR test are sufficient Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 8ca79b7503d8..2a3564caafd3 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -494,7 +494,7 @@ void __init smp_setup_cpu_maps(void) * On pSeries LPAR, we need to know how many cpus * could possibly be added to this partition. */ - if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR) && + if (firmware_has_feature(FW_FEATURE_LPAR) && (dn = of_find_node_by_path("/rtas"))) { int num_addr_cell, num_size_cell, maxcpus; const __be32 *ireg; -- cgit From a7d6392866e9777cb287ad194ce8eca00737066f Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Mon, 11 Jul 2016 14:17:31 +1000 Subject: powerpc/crash: Rearrange loop condition to avoid out of bounds array access The array crash_shutdown_handles[] has size CRASH_HANDLER_MAX, thus when we loop over the elements of the list we check crash_shutdown_handles[i] && i < CRASH_HANDLER_MAX. However this means that when we increment i to CRASH_HANDLER_MAX we will perform an out of bound array access checking the first condition before exiting on the second condition. To avoid the out of bounds access, simply reorder the loop conditions. Fixes: 1d1451655bad ("powerpc: Add array bounds checking to crash_shutdown_handlers") Signed-off-by: Suraj Jitindar Singh Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/crash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 888bdf198c3e..47b63de81f9b 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -351,7 +351,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) old_handler = __debugger_fault_handler; __debugger_fault_handler = handle_fault; crash_shutdown_cpu = smp_processor_id(); - for (i = 0; crash_shutdown_handles[i] && i < CRASH_HANDLER_MAX; i++) { + for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) { if (setjmp(crash_shutdown_buf) == 0) { /* * Insert syncs and delay to ensure -- cgit From 95ec77c06e8e63fff50c497eca0668bf6da39813 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 12 Jul 2016 10:54:52 +1000 Subject: powerpc: Make ppc_md.{halt, restart} __noreturn powernv marks it's halt and restart calls as __noreturn. However, ppc_md does not have this annotation. Add the annotation to ppc_md, and then to every halt/restart function that is missing it. Additionally, I have verified that all of these functions do not return. Occasionally I have added a spin loop to be sure. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/rtas.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 286354f00ff6..6a3e5de544ce 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -685,7 +685,7 @@ int rtas_set_indicator_fast(int indicator, int index, int new_value) return rc; } -void rtas_restart(char *cmd) +void __noreturn rtas_restart(char *cmd) { if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_RESTART); @@ -704,7 +704,7 @@ void rtas_power_off(void) for (;;); } -void rtas_halt(void) +void __noreturn rtas_halt(void) { if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_HALT); -- cgit From 6bcb80143e792becfd2b9cc6a339ce523e4e2219 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 6 Jul 2016 14:58:06 +1000 Subject: powerpc/tm: Fix stack pointer corruption in __tm_recheckpoint() At the start of __tm_recheckpoint() we save the kernel stack pointer (r1) in SPRG SCRATCH0 (SPRG2) so that we can restore it after the trecheckpoint. Unfortunately, the same SPRG is used in the SLB miss handler. If an SLB miss is taken between the save and restore of r1 to the SPRG, the SPRG is changed and hence r1 is also corrupted. We can end up with the following crash when we start using r1 again after the restore from the SPRG: Oops: Bad kernel stack pointer, sig: 6 [#1] SMP NR_CPUS=2048 NUMA pSeries CPU: 658 PID: 143777 Comm: htm_demo Tainted: G EL X 4.4.13-0-default #1 task: c0000b56993a7810 ti: c00000000cfec000 task.ti: c0000b56993bc000 NIP: c00000000004f188 LR: 00000000100040b8 CTR: 0000000010002570 REGS: c00000000cfefd40 TRAP: 0300 Tainted: G EL X (4.4.13-0-default) MSR: 8000000300001033 CR: 02000424 XER: 20000000 CFAR: c000000000008468 DAR: 00003ffd84e66880 DSISR: 40000000 SOFTE: 0 PACATMSCRATCH: 00003ffbc865e680 GPR00: fffffffcfabc4268 00003ffd84e667a0 00000000100d8c38 000000030544bb80 GPR04: 0000000000000002 00000000100cf200 0000000000000449 00000000100cf100 GPR08: 000000000000c350 0000000000002569 0000000000002569 00000000100d6c30 GPR12: 00000000100d6c28 c00000000e6a6b00 00003ffd84660000 0000000000000000 GPR16: 0000000000000003 0000000000000449 0000000010002570 0000010009684f20 GPR20: 0000000000800000 00003ffd84e5f110 00003ffd84e5f7a0 00000000100d0f40 GPR24: 0000000000000000 0000000000000000 0000000000000000 00003ffff0673f50 GPR28: 00003ffd84e5e960 00000000003d0f00 00003ffd84e667a0 00003ffd84e5e680 NIP [c00000000004f188] restore_gprs+0x110/0x17c LR [00000000100040b8] 0x100040b8 Call Trace: Instruction dump: f8a1fff0 e8e700a8 38a00000 7ca10164 e8a1fff8 e821fff0 7c0007dd 7c421378 7db142a6 7c3242a6 38800002 7c810164 e9e100e8 ea0100f0 ea2100f8 We hit this on large memory machines (> 2TB) but it can also be hit on smaller machines when 1TB segments are disabled. To hit this, you also need to be virtualised to ensure SLBs are periodically removed by the hypervisor. This patches moves the saving of r1 to the SPRG to the region where we are guaranteed not to take any further SLB misses. Fixes: 98ae22e15b43 ("powerpc: Add helper functions for transactional memory context switching") Cc: stable@vger.kernel.org # v3.9+ Signed-off-by: Michael Neuling Acked-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/tm.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index b7019b559ddb..298afcf3bf2a 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -338,8 +338,6 @@ _GLOBAL(__tm_recheckpoint) */ subi r7, r7, STACK_FRAME_OVERHEAD - SET_SCRATCH0(r1) - mfmsr r6 /* R4 = original MSR to indicate whether thread used FP/Vector etc. */ @@ -468,6 +466,7 @@ restore_gprs: * until we turn MSR RI back on. */ + SET_SCRATCH0(r1) ld r5, -8(r1) ld r1, -16(r1) -- cgit From bfd1b7ae5e0f6aa3f31d590936d580c6db099bab Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Fri, 8 Jul 2016 11:50:43 +0530 Subject: powerpc/powernv: Use PNV_THREAD_WINKLE macro while requesting for winkle Signed-off-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_power7.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index 470ceebd2d23..705c867306ea 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -252,7 +252,7 @@ _GLOBAL(power7_sleep) /* No return */ _GLOBAL(power7_winkle) - li r3,3 + li r3,PNV_THREAD_WINKLE li r4,1 b power7_powersave_common /* No return */ -- cgit From 1706567117ba93cfa27f6fcc0846b1606e039cc5 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Fri, 8 Jul 2016 11:50:44 +0530 Subject: powerpc/kvm: make hypervisor state restore a function In the current code, when the thread wakes up in reset vector, some of the state restore code and check for whether a thread needs to branch to kvm is duplicated. Reorder the code such that this duplication is avoided. At a higher level this is what the change looks like- Before this patch - power7_wakeup_tb_loss: restore hypervisor state if (thread needed by kvm) goto kvm_start_guest restore nvgprs, cr, pc rfid to process context power7_wakeup_loss: restore nvgprs, cr, pc rfid to process context reset vector: if (waking from deep idle states) goto power7_wakeup_tb_loss else if (thread needed by kvm) goto kvm_start_guest goto power7_wakeup_loss After this patch - power7_wakeup_tb_loss: restore hypervisor state return power7_restore_hyp_resource(): if (waking from deep idle states) goto power7_wakeup_tb_loss return power7_wakeup_loss: restore nvgprs, cr, pc rfid to process context reset vector: power7_restore_hyp_resource() if (thread needed by kvm) goto kvm_start_guest goto power7_wakeup_loss Reviewed-by: Paul Mackerras Reviewed-by: Gautham R. Shenoy Signed-off-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 28 ++------------ arch/powerpc/kernel/idle_power7.S | 72 +++++++++++++++++++++--------------- 2 files changed, 46 insertions(+), 54 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 8bcc1b457115..612a65b2b99e 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -107,25 +107,9 @@ BEGIN_FTR_SECTION beq 9f cmpwi cr3,r13,2 - - /* - * Check if last bit of HSPGR0 is set. This indicates whether we are - * waking up from winkle. - */ GET_PACA(r13) - clrldi r5,r13,63 - clrrdi r13,r13,1 - cmpwi cr4,r5,1 - mtspr SPRN_HSPRG0,r13 + bl power7_restore_hyp_resource - lbz r0,PACA_THREAD_IDLE_STATE(r13) - cmpwi cr2,r0,PNV_THREAD_NAP - bgt cr2,8f /* Either sleep or Winkle */ - - /* Waking up from nap should not cause hypervisor state loss */ - bgt cr3,. - - /* Waking up from nap */ li r0,PNV_THREAD_RUNNING stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */ @@ -143,13 +127,9 @@ BEGIN_FTR_SECTION /* Return SRR1 from power7_nap() */ mfspr r3,SPRN_SRR1 - beq cr3,2f - b power7_wakeup_noloss -2: b power7_wakeup_loss - - /* Fast Sleep wakeup on PowerNV */ -8: GET_PACA(r13) - b power7_wakeup_tb_loss + blt cr3,2f + b power7_wakeup_loss +2: b power7_wakeup_noloss 9: END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index 705c867306ea..d5def062a544 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -276,6 +276,39 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ 20: nop; +/* + * Called from reset vector. Check whether we have woken up with + * hypervisor state loss. If yes, restore hypervisor state and return + * back to reset vector. + * + * r13 - Contents of HSPRG0 + * cr3 - set to gt if waking up with partial/complete hypervisor state loss + */ +_GLOBAL(power7_restore_hyp_resource) + /* + * Check if last bit of HSPGR0 is set. This indicates whether we are + * waking up from winkle. + */ + clrldi r5,r13,63 + clrrdi r13,r13,1 + cmpwi cr4,r5,1 + mtspr SPRN_HSPRG0,r13 + + lbz r0,PACA_THREAD_IDLE_STATE(r13) + cmpwi cr2,r0,PNV_THREAD_NAP + bgt cr2,power7_wakeup_tb_loss /* Either sleep or Winkle */ + + /* + * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking + * up from nap. At this stage CR3 shouldn't contains 'gt' since that + * indicates we are waking with hypervisor state loss from nap. + */ + bgt cr3,. + + blr /* Return back to System Reset vector from where + power7_restore_hyp_resource was invoked */ + + _GLOBAL(power7_wakeup_tb_loss) ld r2,PACATOC(r13); ld r1,PACAR1(r13) @@ -284,11 +317,13 @@ _GLOBAL(power7_wakeup_tb_loss) * and they are restored before switching to the process context. Hence * until they are restored, they are free to be used. * - * Save SRR1 in a NVGPR as it might be clobbered in opal_call_realmode - * (called in CHECK_HMI_INTERRUPT). SRR1 is required to determine the - * wakeup reason if we branch to kvm_start_guest. + * Save SRR1 and LR in NVGPRs as they might be clobbered in + * opal_call_realmode (called in CHECK_HMI_INTERRUPT). SRR1 is required + * to determine the wakeup reason if we branch to kvm_start_guest. LR + * is required to return back to reset vector after hypervisor state + * restore is complete. */ - + mflr r17 mfspr r16,SPRN_SRR1 BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT @@ -438,33 +473,10 @@ common_exit: hypervisor_state_restored: - li r5,PNV_THREAD_RUNNING - stb r5,PACA_THREAD_IDLE_STATE(r13) - mtspr SPRN_SRR1,r16 -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - li r0,KVM_HWTHREAD_IN_KERNEL - stb r0,HSTATE_HWTHREAD_STATE(r13) - /* Order setting hwthread_state vs. testing hwthread_req */ - sync - lbz r0,HSTATE_HWTHREAD_REQ(r13) - cmpwi r0,0 - beq 6f - b kvm_start_guest -6: -#endif - - REST_NVGPRS(r1) - REST_GPR(2, r1) - ld r3,_CCR(r1) - ld r4,_MSR(r1) - ld r5,_NIP(r1) - addi r1,r1,INT_FRAME_SIZE - mtcr r3 - mfspr r3,SPRN_SRR1 /* Return SRR1 */ - mtspr SPRN_SRR1,r4 - mtspr SPRN_SRR0,r5 - rfid + mtlr r17 + blr /* Return back to System Reset vector from where + power7_restore_hyp_resource was invoked */ fastsleep_workaround_at_exit: li r3,1 -- cgit From 83289f909a72596d4902be3b3e1dffe48e6074af Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Fri, 8 Jul 2016 11:50:45 +0530 Subject: powerpc/powernv: Rename idle_power7.S to idle_book3s.S idle_power7.S handles idle entry/exit for POWER7, POWER8 and in next patch for POWER9. Rename the file to a non-hardware specific name. Reviewed-by: Gautham R. Shenoy Signed-off-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/idle_book3s.S | 527 ++++++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/idle_power7.S | 527 -------------------------------------- 3 files changed, 528 insertions(+), 528 deletions(-) create mode 100644 arch/powerpc/kernel/idle_book3s.S delete mode 100644 arch/powerpc/kernel/idle_power7.S (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 2da380fcc34c..9e7bfc322368 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -47,7 +47,7 @@ obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_970_NAP) += idle_power4.o -obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o +obj-$(CONFIG_PPC_P7_NAP) += idle_book3s.o procfs-y := proc_powerpc.o obj-$(CONFIG_PROC_FS) += $(procfs-y) rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S new file mode 100644 index 000000000000..d5def062a544 --- /dev/null +++ b/arch/powerpc/kernel/idle_book3s.S @@ -0,0 +1,527 @@ +/* + * This file contains the power_save function for Power7 CPUs. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG + +/* + * Use unused space in the interrupt stack to save and restore + * registers for winkle support. + */ +#define _SDR1 GPR3 +#define _RPR GPR4 +#define _SPURR GPR5 +#define _PURR GPR6 +#define _TSCR GPR7 +#define _DSCR GPR8 +#define _AMOR GPR9 +#define _WORT GPR10 +#define _WORC GPR11 + +/* Idle state entry routines */ + +#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ + /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ + std r0,0(r1); \ + ptesync; \ + ld r0,0(r1); \ +1: cmp cr0,r0,r0; \ + bne 1b; \ + IDLE_INST; \ + b . + + .text + +/* + * Used by threads when the lock bit of core_idle_state is set. + * Threads will spin in HMT_LOW until the lock bit is cleared. + * r14 - pointer to core_idle_state + * r15 - used to load contents of core_idle_state + */ + +core_idle_lock_held: + HMT_LOW +3: lwz r15,0(r14) + andi. r15,r15,PNV_CORE_IDLE_LOCK_BIT + bne 3b + HMT_MEDIUM + lwarx r15,0,r14 + blr + +/* + * Pass requested state in r3: + * r3 - PNV_THREAD_NAP/SLEEP/WINKLE + * + * To check IRQ_HAPPENED in r4 + * 0 - don't check + * 1 - check + */ +_GLOBAL(power7_powersave_common) + /* Use r3 to pass state nap/sleep/winkle */ + /* NAP is a state loss, we create a regs frame on the + * stack, fill it up with the state we care about and + * stick a pointer to it in PACAR1. We really only + * need to save PC, some CR bits and the NV GPRs, + * but for now an interrupt frame will do. + */ + mflr r0 + std r0,16(r1) + stdu r1,-INT_FRAME_SIZE(r1) + std r0,_LINK(r1) + std r0,_NIP(r1) + + /* Hard disable interrupts */ + mfmsr r9 + rldicl r9,r9,48,1 + rotldi r9,r9,16 + mtmsrd r9,1 /* hard-disable interrupts */ + + /* Check if something happened while soft-disabled */ + lbz r0,PACAIRQHAPPENED(r13) + andi. r0,r0,~PACA_IRQ_HARD_DIS@l + beq 1f + cmpwi cr0,r4,0 + beq 1f + addi r1,r1,INT_FRAME_SIZE + ld r0,16(r1) + li r3,0 /* Return 0 (no nap) */ + mtlr r0 + blr + +1: /* We mark irqs hard disabled as this is the state we'll + * be in when returning and we need to tell arch_local_irq_restore() + * about it + */ + li r0,PACA_IRQ_HARD_DIS + stb r0,PACAIRQHAPPENED(r13) + + /* We haven't lost state ... yet */ + li r0,0 + stb r0,PACA_NAPSTATELOST(r13) + + /* Continue saving state */ + SAVE_GPR(2, r1) + SAVE_NVGPRS(r1) + mfcr r4 + std r4,_CCR(r1) + std r9,_MSR(r1) + std r1,PACAR1(r13) + + /* + * Go to real mode to do the nap, as required by the architecture. + * Also, we need to be in real mode before setting hwthread_state, + * because as soon as we do that, another thread can switch + * the MMU context to the guest. + */ + LOAD_REG_IMMEDIATE(r5, MSR_IDLE) + li r6, MSR_RI + andc r6, r9, r6 + LOAD_REG_ADDR(r7, power7_enter_nap_mode) + mtmsrd r6, 1 /* clear RI before setting SRR0/1 */ + mtspr SPRN_SRR0, r7 + mtspr SPRN_SRR1, r5 + rfid + + .globl power7_enter_nap_mode +power7_enter_nap_mode: +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* Tell KVM we're napping */ + li r4,KVM_HWTHREAD_IN_NAP + stb r4,HSTATE_HWTHREAD_STATE(r13) +#endif + stb r3,PACA_THREAD_IDLE_STATE(r13) + cmpwi cr3,r3,PNV_THREAD_SLEEP + bge cr3,2f + IDLE_STATE_ENTER_SEQ(PPC_NAP) + /* No return */ +2: + /* Sleep or winkle */ + lbz r7,PACA_THREAD_MASK(r13) + ld r14,PACA_CORE_IDLE_STATE_PTR(r13) +lwarx_loop1: + lwarx r15,0,r14 + + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + bnel core_idle_lock_held + + andc r15,r15,r7 /* Clear thread bit */ + + andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS + +/* + * If cr0 = 0, then current thread is the last thread of the core entering + * sleep. Last thread needs to execute the hardware bug workaround code if + * required by the platform. + * Make the workaround call unconditionally here. The below branch call is + * patched out when the idle states are discovered if the platform does not + * require it. + */ +.global pnv_fastsleep_workaround_at_entry +pnv_fastsleep_workaround_at_entry: + beq fastsleep_workaround_at_entry + + stwcx. r15,0,r14 + bne- lwarx_loop1 + isync + +common_enter: /* common code for all the threads entering sleep or winkle */ + bgt cr3,enter_winkle + IDLE_STATE_ENTER_SEQ(PPC_SLEEP) + +fastsleep_workaround_at_entry: + ori r15,r15,PNV_CORE_IDLE_LOCK_BIT + stwcx. r15,0,r14 + bne- lwarx_loop1 + isync + + /* Fast sleep workaround */ + li r3,1 + li r4,1 + li r0,OPAL_CONFIG_CPU_IDLE_STATE + bl opal_call_realmode + + /* Clear Lock bit */ + li r0,0 + lwsync + stw r0,0(r14) + b common_enter + +enter_winkle: + /* + * Note all register i.e per-core, per-subcore or per-thread is saved + * here since any thread in the core might wake up first + */ + mfspr r3,SPRN_SDR1 + std r3,_SDR1(r1) + mfspr r3,SPRN_RPR + std r3,_RPR(r1) + mfspr r3,SPRN_SPURR + std r3,_SPURR(r1) + mfspr r3,SPRN_PURR + std r3,_PURR(r1) + mfspr r3,SPRN_TSCR + std r3,_TSCR(r1) + mfspr r3,SPRN_DSCR + std r3,_DSCR(r1) + mfspr r3,SPRN_AMOR + std r3,_AMOR(r1) + mfspr r3,SPRN_WORT + std r3,_WORT(r1) + mfspr r3,SPRN_WORC + std r3,_WORC(r1) + IDLE_STATE_ENTER_SEQ(PPC_WINKLE) + +_GLOBAL(power7_idle) + /* Now check if user or arch enabled NAP mode */ + LOAD_REG_ADDRBASE(r3,powersave_nap) + lwz r4,ADDROFF(powersave_nap)(r3) + cmpwi 0,r4,0 + beqlr + li r3, 1 + /* fall through */ + +_GLOBAL(power7_nap) + mr r4,r3 + li r3,PNV_THREAD_NAP + b power7_powersave_common + /* No return */ + +_GLOBAL(power7_sleep) + li r3,PNV_THREAD_SLEEP + li r4,1 + b power7_powersave_common + /* No return */ + +_GLOBAL(power7_winkle) + li r3,PNV_THREAD_WINKLE + li r4,1 + b power7_powersave_common + /* No return */ + +#define CHECK_HMI_INTERRUPT \ + mfspr r0,SPRN_SRR1; \ +BEGIN_FTR_SECTION_NESTED(66); \ + rlwinm r0,r0,45-31,0xf; /* extract wake reason field (P8) */ \ +FTR_SECTION_ELSE_NESTED(66); \ + rlwinm r0,r0,45-31,0xe; /* P7 wake reason field is 3 bits */ \ +ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ + cmpwi r0,0xa; /* Hypervisor maintenance ? */ \ + bne 20f; \ + /* Invoke opal call to handle hmi */ \ + ld r2,PACATOC(r13); \ + ld r1,PACAR1(r13); \ + std r3,ORIG_GPR3(r1); /* Save original r3 */ \ + li r0,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \ + bl opal_call_realmode; \ + ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ +20: nop; + + +/* + * Called from reset vector. Check whether we have woken up with + * hypervisor state loss. If yes, restore hypervisor state and return + * back to reset vector. + * + * r13 - Contents of HSPRG0 + * cr3 - set to gt if waking up with partial/complete hypervisor state loss + */ +_GLOBAL(power7_restore_hyp_resource) + /* + * Check if last bit of HSPGR0 is set. This indicates whether we are + * waking up from winkle. + */ + clrldi r5,r13,63 + clrrdi r13,r13,1 + cmpwi cr4,r5,1 + mtspr SPRN_HSPRG0,r13 + + lbz r0,PACA_THREAD_IDLE_STATE(r13) + cmpwi cr2,r0,PNV_THREAD_NAP + bgt cr2,power7_wakeup_tb_loss /* Either sleep or Winkle */ + + /* + * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking + * up from nap. At this stage CR3 shouldn't contains 'gt' since that + * indicates we are waking with hypervisor state loss from nap. + */ + bgt cr3,. + + blr /* Return back to System Reset vector from where + power7_restore_hyp_resource was invoked */ + + +_GLOBAL(power7_wakeup_tb_loss) + ld r2,PACATOC(r13); + ld r1,PACAR1(r13) + /* + * Before entering any idle state, the NVGPRs are saved in the stack + * and they are restored before switching to the process context. Hence + * until they are restored, they are free to be used. + * + * Save SRR1 and LR in NVGPRs as they might be clobbered in + * opal_call_realmode (called in CHECK_HMI_INTERRUPT). SRR1 is required + * to determine the wakeup reason if we branch to kvm_start_guest. LR + * is required to return back to reset vector after hypervisor state + * restore is complete. + */ + mflr r17 + mfspr r16,SPRN_SRR1 +BEGIN_FTR_SECTION + CHECK_HMI_INTERRUPT +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) + + lbz r7,PACA_THREAD_MASK(r13) + ld r14,PACA_CORE_IDLE_STATE_PTR(r13) +lwarx_loop2: + lwarx r15,0,r14 + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + /* + * Lock bit is set in one of the 2 cases- + * a. In the sleep/winkle enter path, the last thread is executing + * fastsleep workaround code. + * b. In the wake up path, another thread is executing fastsleep + * workaround undo code or resyncing timebase or restoring context + * In either case loop until the lock bit is cleared. + */ + bnel core_idle_lock_held + + cmpwi cr2,r15,0 + lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) + and r4,r4,r15 + cmpwi cr1,r4,0 /* Check if first in subcore */ + + /* + * At this stage + * cr1 - 0b0100 if first thread to wakeup in subcore + * cr2 - 0b0100 if first thread to wakeup in core + * cr3- 0b0010 if waking up from sleep or winkle + * cr4 - 0b0100 if waking up from winkle + */ + + or r15,r15,r7 /* Set thread bit */ + + beq cr1,first_thread_in_subcore + + /* Not first thread in subcore to wake up */ + stwcx. r15,0,r14 + bne- lwarx_loop2 + isync + b common_exit + +first_thread_in_subcore: + /* First thread in subcore to wakeup */ + ori r15,r15,PNV_CORE_IDLE_LOCK_BIT + stwcx. r15,0,r14 + bne- lwarx_loop2 + isync + + /* + * If waking up from sleep, subcore state is not lost. Hence + * skip subcore state restore + */ + bne cr4,subcore_state_restored + + /* Restore per-subcore state */ + ld r4,_SDR1(r1) + mtspr SPRN_SDR1,r4 + ld r4,_RPR(r1) + mtspr SPRN_RPR,r4 + ld r4,_AMOR(r1) + mtspr SPRN_AMOR,r4 + +subcore_state_restored: + /* + * Check if the thread is also the first thread in the core. If not, + * skip to clear_lock. + */ + bne cr2,clear_lock + +first_thread_in_core: + + /* + * First thread in the core waking up from fastsleep. It needs to + * call the fastsleep workaround code if the platform requires it. + * Call it unconditionally here. The below branch instruction will + * be patched out when the idle states are discovered if platform + * does not require workaround. + */ +.global pnv_fastsleep_workaround_at_exit +pnv_fastsleep_workaround_at_exit: + b fastsleep_workaround_at_exit + +timebase_resync: + /* Do timebase resync if we are waking up from sleep. Use cr3 value + * set in exceptions-64s.S */ + ble cr3,clear_lock + /* Time base re-sync */ + li r0,OPAL_RESYNC_TIMEBASE + bl opal_call_realmode; + /* TODO: Check r3 for failure */ + + /* + * If waking up from sleep, per core state is not lost, skip to + * clear_lock. + */ + bne cr4,clear_lock + + /* Restore per core state */ + ld r4,_TSCR(r1) + mtspr SPRN_TSCR,r4 + ld r4,_WORC(r1) + mtspr SPRN_WORC,r4 + +clear_lock: + andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS + lwsync + stw r15,0(r14) + +common_exit: + /* + * Common to all threads. + * + * If waking up from sleep, hypervisor state is not lost. Hence + * skip hypervisor state restore. + */ + bne cr4,hypervisor_state_restored + + /* Waking up from winkle */ + + /* Restore per thread state */ + bl __restore_cpu_power8 + + /* Restore SLB from PACA */ + ld r8,PACA_SLBSHADOWPTR(r13) + + .rept SLB_NUM_BOLTED + li r3, SLBSHADOW_SAVEAREA + LDX_BE r5, r8, r3 + addi r3, r3, 8 + LDX_BE r6, r8, r3 + andis. r7,r5,SLB_ESID_V@h + beq 1f + slbmte r6,r5 +1: addi r8,r8,16 + .endr + + ld r4,_SPURR(r1) + mtspr SPRN_SPURR,r4 + ld r4,_PURR(r1) + mtspr SPRN_PURR,r4 + ld r4,_DSCR(r1) + mtspr SPRN_DSCR,r4 + ld r4,_WORT(r1) + mtspr SPRN_WORT,r4 + +hypervisor_state_restored: + + mtspr SPRN_SRR1,r16 + mtlr r17 + blr /* Return back to System Reset vector from where + power7_restore_hyp_resource was invoked */ + +fastsleep_workaround_at_exit: + li r3,1 + li r4,0 + li r0,OPAL_CONFIG_CPU_IDLE_STATE + bl opal_call_realmode + b timebase_resync + +/* + * R3 here contains the value that will be returned to the caller + * of power7_nap. + */ +_GLOBAL(power7_wakeup_loss) + ld r1,PACAR1(r13) +BEGIN_FTR_SECTION + CHECK_HMI_INTERRUPT +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) + REST_NVGPRS(r1) + REST_GPR(2, r1) + ld r6,_CCR(r1) + ld r4,_MSR(r1) + ld r5,_NIP(r1) + addi r1,r1,INT_FRAME_SIZE + mtcr r6 + mtspr SPRN_SRR1,r4 + mtspr SPRN_SRR0,r5 + rfid + +/* + * R3 here contains the value that will be returned to the caller + * of power7_nap. + */ +_GLOBAL(power7_wakeup_noloss) + lbz r0,PACA_NAPSTATELOST(r13) + cmpwi r0,0 + bne power7_wakeup_loss +BEGIN_FTR_SECTION + CHECK_HMI_INTERRUPT +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) + ld r1,PACAR1(r13) + ld r6,_CCR(r1) + ld r4,_MSR(r1) + ld r5,_NIP(r1) + addi r1,r1,INT_FRAME_SIZE + mtcr r6 + mtspr SPRN_SRR1,r4 + mtspr SPRN_SRR0,r5 + rfid diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S deleted file mode 100644 index d5def062a544..000000000000 --- a/arch/powerpc/kernel/idle_power7.S +++ /dev/null @@ -1,527 +0,0 @@ -/* - * This file contains the power_save function for Power7 CPUs. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#undef DEBUG - -/* - * Use unused space in the interrupt stack to save and restore - * registers for winkle support. - */ -#define _SDR1 GPR3 -#define _RPR GPR4 -#define _SPURR GPR5 -#define _PURR GPR6 -#define _TSCR GPR7 -#define _DSCR GPR8 -#define _AMOR GPR9 -#define _WORT GPR10 -#define _WORC GPR11 - -/* Idle state entry routines */ - -#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ - /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ - std r0,0(r1); \ - ptesync; \ - ld r0,0(r1); \ -1: cmp cr0,r0,r0; \ - bne 1b; \ - IDLE_INST; \ - b . - - .text - -/* - * Used by threads when the lock bit of core_idle_state is set. - * Threads will spin in HMT_LOW until the lock bit is cleared. - * r14 - pointer to core_idle_state - * r15 - used to load contents of core_idle_state - */ - -core_idle_lock_held: - HMT_LOW -3: lwz r15,0(r14) - andi. r15,r15,PNV_CORE_IDLE_LOCK_BIT - bne 3b - HMT_MEDIUM - lwarx r15,0,r14 - blr - -/* - * Pass requested state in r3: - * r3 - PNV_THREAD_NAP/SLEEP/WINKLE - * - * To check IRQ_HAPPENED in r4 - * 0 - don't check - * 1 - check - */ -_GLOBAL(power7_powersave_common) - /* Use r3 to pass state nap/sleep/winkle */ - /* NAP is a state loss, we create a regs frame on the - * stack, fill it up with the state we care about and - * stick a pointer to it in PACAR1. We really only - * need to save PC, some CR bits and the NV GPRs, - * but for now an interrupt frame will do. - */ - mflr r0 - std r0,16(r1) - stdu r1,-INT_FRAME_SIZE(r1) - std r0,_LINK(r1) - std r0,_NIP(r1) - - /* Hard disable interrupts */ - mfmsr r9 - rldicl r9,r9,48,1 - rotldi r9,r9,16 - mtmsrd r9,1 /* hard-disable interrupts */ - - /* Check if something happened while soft-disabled */ - lbz r0,PACAIRQHAPPENED(r13) - andi. r0,r0,~PACA_IRQ_HARD_DIS@l - beq 1f - cmpwi cr0,r4,0 - beq 1f - addi r1,r1,INT_FRAME_SIZE - ld r0,16(r1) - li r3,0 /* Return 0 (no nap) */ - mtlr r0 - blr - -1: /* We mark irqs hard disabled as this is the state we'll - * be in when returning and we need to tell arch_local_irq_restore() - * about it - */ - li r0,PACA_IRQ_HARD_DIS - stb r0,PACAIRQHAPPENED(r13) - - /* We haven't lost state ... yet */ - li r0,0 - stb r0,PACA_NAPSTATELOST(r13) - - /* Continue saving state */ - SAVE_GPR(2, r1) - SAVE_NVGPRS(r1) - mfcr r4 - std r4,_CCR(r1) - std r9,_MSR(r1) - std r1,PACAR1(r13) - - /* - * Go to real mode to do the nap, as required by the architecture. - * Also, we need to be in real mode before setting hwthread_state, - * because as soon as we do that, another thread can switch - * the MMU context to the guest. - */ - LOAD_REG_IMMEDIATE(r5, MSR_IDLE) - li r6, MSR_RI - andc r6, r9, r6 - LOAD_REG_ADDR(r7, power7_enter_nap_mode) - mtmsrd r6, 1 /* clear RI before setting SRR0/1 */ - mtspr SPRN_SRR0, r7 - mtspr SPRN_SRR1, r5 - rfid - - .globl power7_enter_nap_mode -power7_enter_nap_mode: -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - /* Tell KVM we're napping */ - li r4,KVM_HWTHREAD_IN_NAP - stb r4,HSTATE_HWTHREAD_STATE(r13) -#endif - stb r3,PACA_THREAD_IDLE_STATE(r13) - cmpwi cr3,r3,PNV_THREAD_SLEEP - bge cr3,2f - IDLE_STATE_ENTER_SEQ(PPC_NAP) - /* No return */ -2: - /* Sleep or winkle */ - lbz r7,PACA_THREAD_MASK(r13) - ld r14,PACA_CORE_IDLE_STATE_PTR(r13) -lwarx_loop1: - lwarx r15,0,r14 - - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT - bnel core_idle_lock_held - - andc r15,r15,r7 /* Clear thread bit */ - - andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS - -/* - * If cr0 = 0, then current thread is the last thread of the core entering - * sleep. Last thread needs to execute the hardware bug workaround code if - * required by the platform. - * Make the workaround call unconditionally here. The below branch call is - * patched out when the idle states are discovered if the platform does not - * require it. - */ -.global pnv_fastsleep_workaround_at_entry -pnv_fastsleep_workaround_at_entry: - beq fastsleep_workaround_at_entry - - stwcx. r15,0,r14 - bne- lwarx_loop1 - isync - -common_enter: /* common code for all the threads entering sleep or winkle */ - bgt cr3,enter_winkle - IDLE_STATE_ENTER_SEQ(PPC_SLEEP) - -fastsleep_workaround_at_entry: - ori r15,r15,PNV_CORE_IDLE_LOCK_BIT - stwcx. r15,0,r14 - bne- lwarx_loop1 - isync - - /* Fast sleep workaround */ - li r3,1 - li r4,1 - li r0,OPAL_CONFIG_CPU_IDLE_STATE - bl opal_call_realmode - - /* Clear Lock bit */ - li r0,0 - lwsync - stw r0,0(r14) - b common_enter - -enter_winkle: - /* - * Note all register i.e per-core, per-subcore or per-thread is saved - * here since any thread in the core might wake up first - */ - mfspr r3,SPRN_SDR1 - std r3,_SDR1(r1) - mfspr r3,SPRN_RPR - std r3,_RPR(r1) - mfspr r3,SPRN_SPURR - std r3,_SPURR(r1) - mfspr r3,SPRN_PURR - std r3,_PURR(r1) - mfspr r3,SPRN_TSCR - std r3,_TSCR(r1) - mfspr r3,SPRN_DSCR - std r3,_DSCR(r1) - mfspr r3,SPRN_AMOR - std r3,_AMOR(r1) - mfspr r3,SPRN_WORT - std r3,_WORT(r1) - mfspr r3,SPRN_WORC - std r3,_WORC(r1) - IDLE_STATE_ENTER_SEQ(PPC_WINKLE) - -_GLOBAL(power7_idle) - /* Now check if user or arch enabled NAP mode */ - LOAD_REG_ADDRBASE(r3,powersave_nap) - lwz r4,ADDROFF(powersave_nap)(r3) - cmpwi 0,r4,0 - beqlr - li r3, 1 - /* fall through */ - -_GLOBAL(power7_nap) - mr r4,r3 - li r3,PNV_THREAD_NAP - b power7_powersave_common - /* No return */ - -_GLOBAL(power7_sleep) - li r3,PNV_THREAD_SLEEP - li r4,1 - b power7_powersave_common - /* No return */ - -_GLOBAL(power7_winkle) - li r3,PNV_THREAD_WINKLE - li r4,1 - b power7_powersave_common - /* No return */ - -#define CHECK_HMI_INTERRUPT \ - mfspr r0,SPRN_SRR1; \ -BEGIN_FTR_SECTION_NESTED(66); \ - rlwinm r0,r0,45-31,0xf; /* extract wake reason field (P8) */ \ -FTR_SECTION_ELSE_NESTED(66); \ - rlwinm r0,r0,45-31,0xe; /* P7 wake reason field is 3 bits */ \ -ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ - cmpwi r0,0xa; /* Hypervisor maintenance ? */ \ - bne 20f; \ - /* Invoke opal call to handle hmi */ \ - ld r2,PACATOC(r13); \ - ld r1,PACAR1(r13); \ - std r3,ORIG_GPR3(r1); /* Save original r3 */ \ - li r0,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \ - bl opal_call_realmode; \ - ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ -20: nop; - - -/* - * Called from reset vector. Check whether we have woken up with - * hypervisor state loss. If yes, restore hypervisor state and return - * back to reset vector. - * - * r13 - Contents of HSPRG0 - * cr3 - set to gt if waking up with partial/complete hypervisor state loss - */ -_GLOBAL(power7_restore_hyp_resource) - /* - * Check if last bit of HSPGR0 is set. This indicates whether we are - * waking up from winkle. - */ - clrldi r5,r13,63 - clrrdi r13,r13,1 - cmpwi cr4,r5,1 - mtspr SPRN_HSPRG0,r13 - - lbz r0,PACA_THREAD_IDLE_STATE(r13) - cmpwi cr2,r0,PNV_THREAD_NAP - bgt cr2,power7_wakeup_tb_loss /* Either sleep or Winkle */ - - /* - * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking - * up from nap. At this stage CR3 shouldn't contains 'gt' since that - * indicates we are waking with hypervisor state loss from nap. - */ - bgt cr3,. - - blr /* Return back to System Reset vector from where - power7_restore_hyp_resource was invoked */ - - -_GLOBAL(power7_wakeup_tb_loss) - ld r2,PACATOC(r13); - ld r1,PACAR1(r13) - /* - * Before entering any idle state, the NVGPRs are saved in the stack - * and they are restored before switching to the process context. Hence - * until they are restored, they are free to be used. - * - * Save SRR1 and LR in NVGPRs as they might be clobbered in - * opal_call_realmode (called in CHECK_HMI_INTERRUPT). SRR1 is required - * to determine the wakeup reason if we branch to kvm_start_guest. LR - * is required to return back to reset vector after hypervisor state - * restore is complete. - */ - mflr r17 - mfspr r16,SPRN_SRR1 -BEGIN_FTR_SECTION - CHECK_HMI_INTERRUPT -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - - lbz r7,PACA_THREAD_MASK(r13) - ld r14,PACA_CORE_IDLE_STATE_PTR(r13) -lwarx_loop2: - lwarx r15,0,r14 - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT - /* - * Lock bit is set in one of the 2 cases- - * a. In the sleep/winkle enter path, the last thread is executing - * fastsleep workaround code. - * b. In the wake up path, another thread is executing fastsleep - * workaround undo code or resyncing timebase or restoring context - * In either case loop until the lock bit is cleared. - */ - bnel core_idle_lock_held - - cmpwi cr2,r15,0 - lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) - and r4,r4,r15 - cmpwi cr1,r4,0 /* Check if first in subcore */ - - /* - * At this stage - * cr1 - 0b0100 if first thread to wakeup in subcore - * cr2 - 0b0100 if first thread to wakeup in core - * cr3- 0b0010 if waking up from sleep or winkle - * cr4 - 0b0100 if waking up from winkle - */ - - or r15,r15,r7 /* Set thread bit */ - - beq cr1,first_thread_in_subcore - - /* Not first thread in subcore to wake up */ - stwcx. r15,0,r14 - bne- lwarx_loop2 - isync - b common_exit - -first_thread_in_subcore: - /* First thread in subcore to wakeup */ - ori r15,r15,PNV_CORE_IDLE_LOCK_BIT - stwcx. r15,0,r14 - bne- lwarx_loop2 - isync - - /* - * If waking up from sleep, subcore state is not lost. Hence - * skip subcore state restore - */ - bne cr4,subcore_state_restored - - /* Restore per-subcore state */ - ld r4,_SDR1(r1) - mtspr SPRN_SDR1,r4 - ld r4,_RPR(r1) - mtspr SPRN_RPR,r4 - ld r4,_AMOR(r1) - mtspr SPRN_AMOR,r4 - -subcore_state_restored: - /* - * Check if the thread is also the first thread in the core. If not, - * skip to clear_lock. - */ - bne cr2,clear_lock - -first_thread_in_core: - - /* - * First thread in the core waking up from fastsleep. It needs to - * call the fastsleep workaround code if the platform requires it. - * Call it unconditionally here. The below branch instruction will - * be patched out when the idle states are discovered if platform - * does not require workaround. - */ -.global pnv_fastsleep_workaround_at_exit -pnv_fastsleep_workaround_at_exit: - b fastsleep_workaround_at_exit - -timebase_resync: - /* Do timebase resync if we are waking up from sleep. Use cr3 value - * set in exceptions-64s.S */ - ble cr3,clear_lock - /* Time base re-sync */ - li r0,OPAL_RESYNC_TIMEBASE - bl opal_call_realmode; - /* TODO: Check r3 for failure */ - - /* - * If waking up from sleep, per core state is not lost, skip to - * clear_lock. - */ - bne cr4,clear_lock - - /* Restore per core state */ - ld r4,_TSCR(r1) - mtspr SPRN_TSCR,r4 - ld r4,_WORC(r1) - mtspr SPRN_WORC,r4 - -clear_lock: - andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS - lwsync - stw r15,0(r14) - -common_exit: - /* - * Common to all threads. - * - * If waking up from sleep, hypervisor state is not lost. Hence - * skip hypervisor state restore. - */ - bne cr4,hypervisor_state_restored - - /* Waking up from winkle */ - - /* Restore per thread state */ - bl __restore_cpu_power8 - - /* Restore SLB from PACA */ - ld r8,PACA_SLBSHADOWPTR(r13) - - .rept SLB_NUM_BOLTED - li r3, SLBSHADOW_SAVEAREA - LDX_BE r5, r8, r3 - addi r3, r3, 8 - LDX_BE r6, r8, r3 - andis. r7,r5,SLB_ESID_V@h - beq 1f - slbmte r6,r5 -1: addi r8,r8,16 - .endr - - ld r4,_SPURR(r1) - mtspr SPRN_SPURR,r4 - ld r4,_PURR(r1) - mtspr SPRN_PURR,r4 - ld r4,_DSCR(r1) - mtspr SPRN_DSCR,r4 - ld r4,_WORT(r1) - mtspr SPRN_WORT,r4 - -hypervisor_state_restored: - - mtspr SPRN_SRR1,r16 - mtlr r17 - blr /* Return back to System Reset vector from where - power7_restore_hyp_resource was invoked */ - -fastsleep_workaround_at_exit: - li r3,1 - li r4,0 - li r0,OPAL_CONFIG_CPU_IDLE_STATE - bl opal_call_realmode - b timebase_resync - -/* - * R3 here contains the value that will be returned to the caller - * of power7_nap. - */ -_GLOBAL(power7_wakeup_loss) - ld r1,PACAR1(r13) -BEGIN_FTR_SECTION - CHECK_HMI_INTERRUPT -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - REST_NVGPRS(r1) - REST_GPR(2, r1) - ld r6,_CCR(r1) - ld r4,_MSR(r1) - ld r5,_NIP(r1) - addi r1,r1,INT_FRAME_SIZE - mtcr r6 - mtspr SPRN_SRR1,r4 - mtspr SPRN_SRR0,r5 - rfid - -/* - * R3 here contains the value that will be returned to the caller - * of power7_nap. - */ -_GLOBAL(power7_wakeup_noloss) - lbz r0,PACA_NAPSTATELOST(r13) - cmpwi r0,0 - bne power7_wakeup_loss -BEGIN_FTR_SECTION - CHECK_HMI_INTERRUPT -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - ld r1,PACAR1(r13) - ld r6,_CCR(r1) - ld r4,_MSR(r1) - ld r5,_NIP(r1) - addi r1,r1,INT_FRAME_SIZE - mtcr r6 - mtspr SPRN_SRR1,r4 - mtspr SPRN_SRR0,r5 - rfid -- cgit From 5fa6b6bd7adf347f2989560e7a3b7f806be0187f Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Fri, 8 Jul 2016 11:50:46 +0530 Subject: powerpc/powernv: Rename reusable idle functions to hardware agnostic names Functions like power7_wakeup_loss, power7_wakeup_noloss, power7_wakeup_tb_loss are used by POWER7 and POWER8 hardware. They can also be used by POWER9. Hence rename these functions hardware agnostic names. Suggested-by: Gautham R. Shenoy Signed-off-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 8 ++++---- arch/powerpc/kernel/idle_book3s.S | 33 +++++++++++++++++---------------- 2 files changed, 21 insertions(+), 20 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 612a65b2b99e..5c009c5c895d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -108,7 +108,7 @@ BEGIN_FTR_SECTION cmpwi cr3,r13,2 GET_PACA(r13) - bl power7_restore_hyp_resource + bl pnv_restore_hyp_resource li r0,PNV_THREAD_RUNNING stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */ @@ -128,8 +128,8 @@ BEGIN_FTR_SECTION /* Return SRR1 from power7_nap() */ mfspr r3,SPRN_SRR1 blt cr3,2f - b power7_wakeup_loss -2: b power7_wakeup_noloss + b pnv_wakeup_loss +2: b pnv_wakeup_noloss 9: END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) @@ -1269,7 +1269,7 @@ machine_check_handle_early: GET_PACA(r13) ld r1,PACAR1(r13) li r3,PNV_THREAD_NAP - b power7_enter_nap_mode + b pnv_enter_arch207_idle_mode 4: #endif /* diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index d5def062a544..34dbfc925707 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -1,5 +1,6 @@ /* - * This file contains the power_save function for Power7 CPUs. + * This file contains idle entry/exit functions for POWER7 and + * POWER8 CPUs. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -75,7 +76,7 @@ core_idle_lock_held: * 0 - don't check * 1 - check */ -_GLOBAL(power7_powersave_common) +_GLOBAL(pnv_powersave_common) /* Use r3 to pass state nap/sleep/winkle */ /* NAP is a state loss, we create a regs frame on the * stack, fill it up with the state we care about and @@ -135,14 +136,14 @@ _GLOBAL(power7_powersave_common) LOAD_REG_IMMEDIATE(r5, MSR_IDLE) li r6, MSR_RI andc r6, r9, r6 - LOAD_REG_ADDR(r7, power7_enter_nap_mode) + LOAD_REG_ADDR(r7, pnv_enter_arch207_idle_mode) mtmsrd r6, 1 /* clear RI before setting SRR0/1 */ mtspr SPRN_SRR0, r7 mtspr SPRN_SRR1, r5 rfid - .globl power7_enter_nap_mode -power7_enter_nap_mode: + .globl pnv_enter_arch207_idle_mode +pnv_enter_arch207_idle_mode: #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* Tell KVM we're napping */ li r4,KVM_HWTHREAD_IN_NAP @@ -242,19 +243,19 @@ _GLOBAL(power7_idle) _GLOBAL(power7_nap) mr r4,r3 li r3,PNV_THREAD_NAP - b power7_powersave_common + b pnv_powersave_common /* No return */ _GLOBAL(power7_sleep) li r3,PNV_THREAD_SLEEP li r4,1 - b power7_powersave_common + b pnv_powersave_common /* No return */ _GLOBAL(power7_winkle) li r3,PNV_THREAD_WINKLE li r4,1 - b power7_powersave_common + b pnv_powersave_common /* No return */ #define CHECK_HMI_INTERRUPT \ @@ -284,7 +285,7 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ * r13 - Contents of HSPRG0 * cr3 - set to gt if waking up with partial/complete hypervisor state loss */ -_GLOBAL(power7_restore_hyp_resource) +_GLOBAL(pnv_restore_hyp_resource) /* * Check if last bit of HSPGR0 is set. This indicates whether we are * waking up from winkle. @@ -296,7 +297,7 @@ _GLOBAL(power7_restore_hyp_resource) lbz r0,PACA_THREAD_IDLE_STATE(r13) cmpwi cr2,r0,PNV_THREAD_NAP - bgt cr2,power7_wakeup_tb_loss /* Either sleep or Winkle */ + bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */ /* * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking @@ -306,10 +307,10 @@ _GLOBAL(power7_restore_hyp_resource) bgt cr3,. blr /* Return back to System Reset vector from where - power7_restore_hyp_resource was invoked */ + pnv_restore_hyp_resource was invoked */ -_GLOBAL(power7_wakeup_tb_loss) +_GLOBAL(pnv_wakeup_tb_loss) ld r2,PACATOC(r13); ld r1,PACAR1(r13) /* @@ -476,7 +477,7 @@ hypervisor_state_restored: mtspr SPRN_SRR1,r16 mtlr r17 blr /* Return back to System Reset vector from where - power7_restore_hyp_resource was invoked */ + pnv_restore_hyp_resource was invoked */ fastsleep_workaround_at_exit: li r3,1 @@ -489,7 +490,7 @@ fastsleep_workaround_at_exit: * R3 here contains the value that will be returned to the caller * of power7_nap. */ -_GLOBAL(power7_wakeup_loss) +_GLOBAL(pnv_wakeup_loss) ld r1,PACAR1(r13) BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT @@ -509,10 +510,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) * R3 here contains the value that will be returned to the caller * of power7_nap. */ -_GLOBAL(power7_wakeup_noloss) +_GLOBAL(pnv_wakeup_noloss) lbz r0,PACA_NAPSTATELOST(r13) cmpwi r0,0 - bne power7_wakeup_loss + bne pnv_wakeup_loss BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) -- cgit From 4eae2c9ae54a5ef3ca32370e3b28a6f83c61401f Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Fri, 8 Jul 2016 11:50:47 +0530 Subject: powerpc/powernv: Make pnv_powersave_common more generic pnv_powersave_common does common steps needed before entering idle state and eventually changes MSR to MSR_IDLE and does rfid to pnv_enter_arch207_idle_mode. Move the updation of HSTATE_HWTHREAD_STATE to pnv_powersave_common from pnv_enter_arch207_idle_mode and make it more generic by passing the rfid address as a function parameter. Reviewed-by: Gautham R. Shenoy Signed-off-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_book3s.S | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 34dbfc925707..a8397e398817 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -75,6 +75,8 @@ core_idle_lock_held: * To check IRQ_HAPPENED in r4 * 0 - don't check * 1 - check + * + * Address to 'rfid' to in r5 */ _GLOBAL(pnv_powersave_common) /* Use r3 to pass state nap/sleep/winkle */ @@ -127,28 +129,28 @@ _GLOBAL(pnv_powersave_common) std r9,_MSR(r1) std r1,PACAR1(r13) +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* Tell KVM we're entering idle */ + li r4,KVM_HWTHREAD_IN_NAP + stb r4,HSTATE_HWTHREAD_STATE(r13) +#endif + /* * Go to real mode to do the nap, as required by the architecture. * Also, we need to be in real mode before setting hwthread_state, * because as soon as we do that, another thread can switch * the MMU context to the guest. */ - LOAD_REG_IMMEDIATE(r5, MSR_IDLE) + LOAD_REG_IMMEDIATE(r7, MSR_IDLE) li r6, MSR_RI andc r6, r9, r6 - LOAD_REG_ADDR(r7, pnv_enter_arch207_idle_mode) mtmsrd r6, 1 /* clear RI before setting SRR0/1 */ - mtspr SPRN_SRR0, r7 - mtspr SPRN_SRR1, r5 + mtspr SPRN_SRR0, r5 + mtspr SPRN_SRR1, r7 rfid .globl pnv_enter_arch207_idle_mode pnv_enter_arch207_idle_mode: -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - /* Tell KVM we're napping */ - li r4,KVM_HWTHREAD_IN_NAP - stb r4,HSTATE_HWTHREAD_STATE(r13) -#endif stb r3,PACA_THREAD_IDLE_STATE(r13) cmpwi cr3,r3,PNV_THREAD_SLEEP bge cr3,2f @@ -243,18 +245,21 @@ _GLOBAL(power7_idle) _GLOBAL(power7_nap) mr r4,r3 li r3,PNV_THREAD_NAP + LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode) b pnv_powersave_common /* No return */ _GLOBAL(power7_sleep) li r3,PNV_THREAD_SLEEP li r4,1 + LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode) b pnv_powersave_common /* No return */ _GLOBAL(power7_winkle) li r3,PNV_THREAD_WINKLE li r4,1 + LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode) b pnv_powersave_common /* No return */ -- cgit From 0dfffb48cecd8f84c6e649baee9bacd9be925734 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Fri, 8 Jul 2016 11:50:48 +0530 Subject: powerpc/powernv: abstraction for saving SPRs before entering deep idle states Create a function for saving SPRs before entering deep idle states. This function can be reused for POWER9 deep idle states. Reviewed-by: Gautham R. Shenoy Signed-off-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_book3s.S | 54 +++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 22 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index a8397e398817..2f909a12c76c 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -52,6 +52,36 @@ .text +/* + * Used by threads before entering deep idle states. Saves SPRs + * in interrupt stack frame + */ +save_sprs_to_stack: + /* + * Note all register i.e per-core, per-subcore or per-thread is saved + * here since any thread in the core might wake up first + */ + mfspr r3,SPRN_SDR1 + std r3,_SDR1(r1) + mfspr r3,SPRN_RPR + std r3,_RPR(r1) + mfspr r3,SPRN_SPURR + std r3,_SPURR(r1) + mfspr r3,SPRN_PURR + std r3,_PURR(r1) + mfspr r3,SPRN_TSCR + std r3,_TSCR(r1) + mfspr r3,SPRN_DSCR + std r3,_DSCR(r1) + mfspr r3,SPRN_AMOR + std r3,_AMOR(r1) + mfspr r3,SPRN_WORT + std r3,_WORT(r1) + mfspr r3,SPRN_WORC + std r3,_WORC(r1) + + blr + /* * Used by threads when the lock bit of core_idle_state is set. * Threads will spin in HMT_LOW until the lock bit is cleared. @@ -209,28 +239,8 @@ fastsleep_workaround_at_entry: b common_enter enter_winkle: - /* - * Note all register i.e per-core, per-subcore or per-thread is saved - * here since any thread in the core might wake up first - */ - mfspr r3,SPRN_SDR1 - std r3,_SDR1(r1) - mfspr r3,SPRN_RPR - std r3,_RPR(r1) - mfspr r3,SPRN_SPURR - std r3,_SPURR(r1) - mfspr r3,SPRN_PURR - std r3,_PURR(r1) - mfspr r3,SPRN_TSCR - std r3,_TSCR(r1) - mfspr r3,SPRN_DSCR - std r3,_DSCR(r1) - mfspr r3,SPRN_AMOR - std r3,_AMOR(r1) - mfspr r3,SPRN_WORT - std r3,_WORT(r1) - mfspr r3,SPRN_WORC - std r3,_WORC(r1) + bl save_sprs_to_stack + IDLE_STATE_ENTER_SEQ(PPC_WINKLE) _GLOBAL(power7_idle) -- cgit From bcef83a00dc44ee25ff4d6e078cf6432ddf74dec Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Fri, 8 Jul 2016 11:50:49 +0530 Subject: powerpc/powernv: Add platform support for stop instruction POWER ISA v3 defines a new idle processor core mechanism. In summary, a) new instruction named stop is added. This instruction replaces instructions like nap, sleep, rvwinkle. b) new per thread SPR named Processor Stop Status and Control Register (PSSCR) is added which controls the behavior of stop instruction. PSSCR layout: ---------------------------------------------------------- | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL | ---------------------------------------------------------- 0 4 41 42 43 44 48 54 56 60 PSSCR key fields: Bits 0:3 - Power-Saving Level Status. This field indicates the lowest power-saving state the thread entered since stop instruction was last executed. Bit 42 - Enable State Loss 0 - No state is lost irrespective of other fields 1 - Allows state loss Bits 44:47 - Power-Saving Level Limit This limits the power-saving level that can be entered into. Bits 60:63 - Requested Level Used to specify which power-saving level must be entered on executing stop instruction This patch adds support for stop instruction and PSSCR handling. Reviewed-by: Gautham R. Shenoy Signed-off-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_book3s.S | 193 +++++++++++++++++++++++++++++++------- 1 file changed, 159 insertions(+), 34 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 2f909a12c76c..1f564eb409c3 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -1,6 +1,6 @@ /* - * This file contains idle entry/exit functions for POWER7 and - * POWER8 CPUs. + * This file contains idle entry/exit functions for POWER7, + * POWER8 and POWER9 CPUs. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -21,6 +21,7 @@ #include #include #include +#include #undef DEBUG @@ -37,6 +38,11 @@ #define _AMOR GPR9 #define _WORT GPR10 #define _WORC GPR11 +#define _PTCR GPR12 + +#define PSSCR_HV_TEMPLATE PSSCR_ESL | PSSCR_EC | \ + PSSCR_PSLL_MASK | PSSCR_TR_MASK | \ + PSSCR_MTL_MASK /* Idle state entry routines */ @@ -61,8 +67,17 @@ save_sprs_to_stack: * Note all register i.e per-core, per-subcore or per-thread is saved * here since any thread in the core might wake up first */ +BEGIN_FTR_SECTION + mfspr r3,SPRN_PTCR + std r3,_PTCR(r1) + /* + * Note - SDR1 is dropped in Power ISA v3. Hence not restoring + * SDR1 here + */ +FTR_SECTION_ELSE mfspr r3,SPRN_SDR1 std r3,_SDR1(r1) +ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) mfspr r3,SPRN_RPR std r3,_RPR(r1) mfspr r3,SPRN_SPURR @@ -100,7 +115,8 @@ core_idle_lock_held: /* * Pass requested state in r3: - * r3 - PNV_THREAD_NAP/SLEEP/WINKLE + * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8 + * - Requested STOP state in POWER9 * * To check IRQ_HAPPENED in r4 * 0 - don't check @@ -161,7 +177,7 @@ _GLOBAL(pnv_powersave_common) #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* Tell KVM we're entering idle */ - li r4,KVM_HWTHREAD_IN_NAP + li r4,KVM_HWTHREAD_IN_IDLE stb r4,HSTATE_HWTHREAD_STATE(r13) #endif @@ -243,6 +259,41 @@ enter_winkle: IDLE_STATE_ENTER_SEQ(PPC_WINKLE) +/* + * r3 - requested stop state + */ +power_enter_stop: +/* + * Check if the requested state is a deep idle state. + */ + LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) + ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) + cmpd r3,r4 + bge 2f + IDLE_STATE_ENTER_SEQ(PPC_STOP) +2: +/* + * Entering deep idle state. + * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to + * stack and enter stop + */ + lbz r7,PACA_THREAD_MASK(r13) + ld r14,PACA_CORE_IDLE_STATE_PTR(r13) + +lwarx_loop_stop: + lwarx r15,0,r14 + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + bnel core_idle_lock_held + andc r15,r15,r7 /* Clear thread bit */ + + stwcx. r15,0,r14 + bne- lwarx_loop_stop + isync + + bl save_sprs_to_stack + + IDLE_STATE_ENTER_SEQ(PPC_STOP) + _GLOBAL(power7_idle) /* Now check if user or arch enabled NAP mode */ LOAD_REG_ADDRBASE(r3,powersave_nap) @@ -292,6 +343,17 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ 20: nop; +/* + * r3 - requested stop state + */ +_GLOBAL(power9_idle_stop) + LOAD_REG_IMMEDIATE(r4, PSSCR_HV_TEMPLATE) + or r4,r4,r3 + mtspr SPRN_PSSCR, r4 + li r4, 1 + LOAD_REG_ADDR(r5,power_enter_stop) + b pnv_powersave_common + /* No return */ /* * Called from reset vector. Check whether we have woken up with * hypervisor state loss. If yes, restore hypervisor state and return @@ -301,7 +363,33 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ * cr3 - set to gt if waking up with partial/complete hypervisor state loss */ _GLOBAL(pnv_restore_hyp_resource) + ld r2,PACATOC(r13); +BEGIN_FTR_SECTION + /* + * POWER ISA 3. Use PSSCR to determine if we + * are waking up from deep idle state + */ + LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) + ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) + + mfspr r5,SPRN_PSSCR /* + * 0-3 bits correspond to Power-Saving Level Status + * which indicates the idle state we are waking up from + */ + rldicl r5,r5,4,60 + cmpd cr4,r5,r4 + bge cr4,pnv_wakeup_tb_loss + /* + * Waking up without hypervisor state loss. Return to + * reset vector + */ + blr + +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + + /* + * POWER ISA 2.07 or less. * Check if last bit of HSPGR0 is set. This indicates whether we are * waking up from winkle. */ @@ -324,9 +412,17 @@ _GLOBAL(pnv_restore_hyp_resource) blr /* Return back to System Reset vector from where pnv_restore_hyp_resource was invoked */ - +/* + * Called if waking up from idle state which can cause either partial or + * complete hyp state loss. + * In POWER8, called if waking up from fastsleep or winkle + * In POWER9, called if waking up from stop state >= pnv_first_deep_stop_state + * + * r13 - PACA + * cr3 - gt if waking up with partial/complete hypervisor state loss + * cr4 - eq if waking up from complete hypervisor state loss. + */ _GLOBAL(pnv_wakeup_tb_loss) - ld r2,PACATOC(r13); ld r1,PACAR1(r13) /* * Before entering any idle state, the NVGPRs are saved in the stack @@ -361,35 +457,35 @@ lwarx_loop2: bnel core_idle_lock_held cmpwi cr2,r15,0 - lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) - and r4,r4,r15 - cmpwi cr1,r4,0 /* Check if first in subcore */ /* * At this stage - * cr1 - 0b0100 if first thread to wakeup in subcore - * cr2 - 0b0100 if first thread to wakeup in core - * cr3- 0b0010 if waking up from sleep or winkle - * cr4 - 0b0100 if waking up from winkle + * cr2 - eq if first thread to wakeup in core + * cr3- gt if waking up with partial/complete hypervisor state loss + * cr4 - eq if waking up from complete hypervisor state loss. */ - or r15,r15,r7 /* Set thread bit */ - - beq cr1,first_thread_in_subcore - - /* Not first thread in subcore to wake up */ - stwcx. r15,0,r14 - bne- lwarx_loop2 - isync - b common_exit - -first_thread_in_subcore: - /* First thread in subcore to wakeup */ ori r15,r15,PNV_CORE_IDLE_LOCK_BIT stwcx. r15,0,r14 bne- lwarx_loop2 isync +BEGIN_FTR_SECTION + lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) + and r4,r4,r15 + cmpwi r4,0 /* Check if first in subcore */ + + or r15,r15,r7 /* Set thread bit */ + beq first_thread_in_subcore +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) + + or r15,r15,r7 /* Set thread bit */ + beq cr2,first_thread_in_core + + /* Not first thread in core or subcore to wake up */ + b clear_lock + +first_thread_in_subcore: /* * If waking up from sleep, subcore state is not lost. Hence * skip subcore state restore @@ -399,6 +495,7 @@ first_thread_in_subcore: /* Restore per-subcore state */ ld r4,_SDR1(r1) mtspr SPRN_SDR1,r4 + ld r4,_RPR(r1) mtspr SPRN_RPR,r4 ld r4,_AMOR(r1) @@ -414,19 +511,23 @@ subcore_state_restored: first_thread_in_core: /* - * First thread in the core waking up from fastsleep. It needs to + * First thread in the core waking up from any state which can cause + * partial or complete hypervisor state loss. It needs to * call the fastsleep workaround code if the platform requires it. * Call it unconditionally here. The below branch instruction will - * be patched out when the idle states are discovered if platform - * does not require workaround. + * be patched out if the platform does not have fastsleep or does not + * require the workaround. Patching will be performed during the + * discovery of idle-states. */ .global pnv_fastsleep_workaround_at_exit pnv_fastsleep_workaround_at_exit: b fastsleep_workaround_at_exit timebase_resync: - /* Do timebase resync if we are waking up from sleep. Use cr3 value - * set in exceptions-64s.S */ + /* + * Use cr3 which indicates that we are waking up with atleast partial + * hypervisor state loss to determine if TIMEBASE RESYNC is needed. + */ ble cr3,clear_lock /* Time base re-sync */ li r0,OPAL_RESYNC_TIMEBASE @@ -439,7 +540,18 @@ timebase_resync: */ bne cr4,clear_lock - /* Restore per core state */ + /* + * First thread in the core to wake up and its waking up with + * complete hypervisor state loss. Restore per core hypervisor + * state. + */ +BEGIN_FTR_SECTION + ld r4,_PTCR(r1) + mtspr SPRN_PTCR,r4 + ld r4,_RPR(r1) + mtspr SPRN_RPR,r4 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + ld r4,_TSCR(r1) mtspr SPRN_TSCR,r4 ld r4,_WORC(r1) @@ -461,9 +573,9 @@ common_exit: /* Waking up from winkle */ - /* Restore per thread state */ - bl __restore_cpu_power8 - +BEGIN_MMU_FTR_SECTION + b no_segments +END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX) /* Restore SLB from PACA */ ld r8,PACA_SLBSHADOWPTR(r13) @@ -477,6 +589,9 @@ common_exit: slbmte r6,r5 1: addi r8,r8,16 .endr +no_segments: + + /* Restore per thread state */ ld r4,_SPURR(r1) mtspr SPRN_SPURR,r4 @@ -487,6 +602,16 @@ common_exit: ld r4,_WORT(r1) mtspr SPRN_WORT,r4 + /* Call cur_cpu_spec->cpu_restore() */ + LOAD_REG_ADDR(r4, cur_cpu_spec) + ld r4,0(r4) + ld r12,CPU_SPEC_RESTORE(r4) +#ifdef PPC64_ELF_ABI_v1 + ld r12,0(r12) +#endif + mtctr r12 + bctrl + hypervisor_state_restored: mtspr SPRN_SRR1,r16 -- cgit From b88d4bce2b883e7f357ecf8f0cae070b9732f82b Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sat, 16 Jul 2016 17:58:25 -0500 Subject: powerpc/book64s: Move a few exception common handlers to make room This moves the CBE RAS and facility unavailable "common" handlers down to after the FWNMI page. This frees up some space in the very demanded spaces before the relocation-on vectors and before the FWNMI page. They are still within 64K of __start, so CONFIG_RELOCATABLE should still work. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 5c009c5c895d..38340315a143 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -742,11 +742,6 @@ kvmppc_skip_Hinterrupt: #else STD_EXCEPTION_COMMON(0x1700, altivec_assist, unknown_exception) #endif -#ifdef CONFIG_CBE_RAS - STD_EXCEPTION_COMMON(0x1200, cbe_system_error, cbe_system_error_exception) - STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, cbe_maintenance_exception) - STD_EXCEPTION_COMMON(0x1800, cbe_thermal, cbe_thermal_exception) -#endif /* CONFIG_CBE_RAS */ /* * Relocation-on interrupts: A subset of the interrupts can be delivered @@ -1111,9 +1106,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) bl vsx_unavailable_exception b ret_from_except - STD_EXCEPTION_COMMON(0xf60, facility_unavailable, facility_unavailable_exception) - STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, facility_unavailable_exception) - /* Equivalents to the above handlers for relocation-on interrupt vectors */ STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist) MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell) @@ -1150,6 +1142,15 @@ fwnmi_data_area: . = 0x8000 #endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ + STD_EXCEPTION_COMMON(0xf60, facility_unavailable, facility_unavailable_exception) + STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, facility_unavailable_exception) + +#ifdef CONFIG_CBE_RAS + STD_EXCEPTION_COMMON(0x1200, cbe_system_error, cbe_system_error_exception) + STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, cbe_maintenance_exception) + STD_EXCEPTION_COMMON(0x1800, cbe_thermal, cbe_thermal_exception) +#endif /* CONFIG_CBE_RAS */ + .globl hmi_exception_early hmi_exception_early: EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60) -- cgit From 9baaef0a22c82a6c5d85b7535240e778c06e5b20 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 8 Jul 2016 16:37:06 +1000 Subject: powerpc/irq: Add support for HV virtualization interrupts This will be delivering external interrupts from the XIVE to the Hypervisor. We treat it as a normal external interrupt for the lazy irq disable code (so it will be replayed as a 0x500) and route it to do_IRQ. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/cpu_setup_power.S | 2 ++ arch/powerpc/kernel/exceptions-64s.S | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index ec8a228df2f6..52ff3f025437 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -99,6 +99,7 @@ _GLOBAL(__setup_cpu_power9) mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH + ori r3, r3, LPCR_HVICE bl __init_LPCR bl __init_HFSCR bl __init_tlb_power9 @@ -118,6 +119,7 @@ _GLOBAL(__restore_cpu_power9) mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH + ori r3, r3, LPCR_HVICE bl __init_LPCR bl __init_HFSCR bl __init_tlb_power9 diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 38340315a143..6200e4925d26 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -331,6 +331,12 @@ hv_doorbell_trampoline: EXCEPTION_PROLOG_0(PACA_EXGEN) b h_doorbell_hv + . = 0xea0 +hv_virt_irq_trampoline: + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b h_virt_irq_hv + /* We need to deal with the Altivec unavailable exception * here which is at 0xf20, thus in the middle of the * prolog code of the PerformanceMonitor one. A little @@ -581,6 +587,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) MASKABLE_EXCEPTION_HV_OOL(0xe82, h_doorbell) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe82) + MASKABLE_EXCEPTION_HV_OOL(0xea2, h_virt_irq) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xea2) + /* moved from 0xf00 */ STD_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf00) @@ -660,6 +669,8 @@ _GLOBAL(__replay_interrupt) BEGIN_FTR_SECTION cmpwi r3,0xe80 beq h_doorbell_common + cmpwi r3,0xea0 + beq h_virt_irq_common FTR_SECTION_ELSE cmpwi r3,0xa00 beq doorbell_super_common @@ -734,6 +745,7 @@ kvmppc_skip_Hinterrupt: #else STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, unknown_exception) #endif + STD_EXCEPTION_COMMON_ASYNC(0xea0, h_virt_irq, do_IRQ) STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, performance_monitor_exception) STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, instruction_breakpoint_exception) STD_EXCEPTION_COMMON(0x1502, denorm, unknown_exception) @@ -852,6 +864,12 @@ h_doorbell_relon_trampoline: EXCEPTION_PROLOG_0(PACA_EXGEN) b h_doorbell_relon_hv + . = 0x4ea0 +h_virt_irq_relon_trampoline: + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b h_virt_irq_relon_hv + . = 0x4f00 performance_monitor_relon_pseries_trampoline: SET_SCRATCH0(r13) @@ -1109,6 +1127,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) /* Equivalents to the above handlers for relocation-on interrupt vectors */ STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist) MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell) + MASKABLE_RELON_EXCEPTION_HV_OOL(0xea0, h_virt_irq) STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) -- cgit From 1d607bb3bd60f404d1ceb0d6ebceadf261068422 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 8 Jul 2016 16:37:07 +1000 Subject: powerpc/irq: Add mechanism to force a replay of interrupts Calling this function with interrupts soft-disabled will cause a replay of the external interrupt vector when they are re-enabled. This will be used by the OPAL XICS backend (and latter by the native XIVE code) to handle EOI signaling that there are more interrupts to fetch from the hardware since the hardware won't issue another HW interrupt in that case. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/irq.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 58217aec30ea..ac910d9982df 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -342,6 +342,21 @@ bool prep_irq_for_idle(void) return true; } +/* + * Force a replay of the external interrupt handler on this CPU. + */ +void force_external_irq_replay(void) +{ + /* + * This must only be called with interrupts soft-disabled, + * the replay will happen when re-enabling. + */ + WARN_ON(!arch_irqs_disabled()); + + /* Indicate in the PACA that we have an interrupt to replay */ + local_paca->irq_happened |= PACA_IRQ_EE; +} + #endif /* CONFIG_PPC64 */ int arch_show_interrupts(struct seq_file *p, int prec) -- cgit From 69c592ed40d32b4b680fd46c1b059cfe8abeb755 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 8 Jul 2016 16:37:11 +1000 Subject: powerpc/opal: Add real mode call wrappers Replace the old generic opal_call_realmode() with proper per-call wrappers similar to the normal ones and convert callers. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_book3s.S | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 1f564eb409c3..335eb6cedae5 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -245,8 +245,7 @@ fastsleep_workaround_at_entry: /* Fast sleep workaround */ li r3,1 li r4,1 - li r0,OPAL_CONFIG_CPU_IDLE_STATE - bl opal_call_realmode + bl opal_rm_config_cpu_idle_state /* Clear Lock bit */ li r0,0 @@ -337,8 +336,7 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ ld r2,PACATOC(r13); \ ld r1,PACAR1(r13); \ std r3,ORIG_GPR3(r1); /* Save original r3 */ \ - li r0,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \ - bl opal_call_realmode; \ + bl opal_rm_handle_hmi; \ ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ 20: nop; @@ -430,7 +428,7 @@ _GLOBAL(pnv_wakeup_tb_loss) * until they are restored, they are free to be used. * * Save SRR1 and LR in NVGPRs as they might be clobbered in - * opal_call_realmode (called in CHECK_HMI_INTERRUPT). SRR1 is required + * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required * to determine the wakeup reason if we branch to kvm_start_guest. LR * is required to return back to reset vector after hypervisor state * restore is complete. @@ -530,10 +528,7 @@ timebase_resync: */ ble cr3,clear_lock /* Time base re-sync */ - li r0,OPAL_RESYNC_TIMEBASE - bl opal_call_realmode; - /* TODO: Check r3 for failure */ - + bl opal_rm_resync_timebase; /* * If waking up from sleep, per core state is not lost, skip to * clear_lock. @@ -622,8 +617,7 @@ hypervisor_state_restored: fastsleep_workaround_at_exit: li r3,1 li r4,0 - li r0,OPAL_CONFIG_CPU_IDLE_STATE - bl opal_call_realmode + bl opal_rm_config_cpu_idle_state b timebase_resync /* -- cgit From 9a1a70ae1545c99ed685431c2ab6c4a2c58625de Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 8 Jul 2016 16:37:18 +1000 Subject: powerpc/pci: Don't try to allocate resources that will be reassigned When we know we will reassign all resources, trying (and failing) to allocate them initially is fairly pointless and leads to a lot of scary messages in the kernel log Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci-common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index c6ac4f01dd56..f93942b4b6a6 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1418,8 +1418,10 @@ void __init pcibios_resource_survey(void) /* Allocate and assign resources */ list_for_each_entry(b, &pci_root_buses, node) pcibios_allocate_bus_resources(b); - pcibios_allocate_resources(0); - pcibios_allocate_resources(1); + if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC)) { + pcibios_allocate_resources(0); + pcibios_allocate_resources(1); + } /* Before we start assigning unassigned resource, we try to reserve * the low IO area and the VGA memory area if they intersect the -- cgit From accfad7d0a85c5678eef76083972426032d64469 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 13 Jul 2016 15:05:24 +0530 Subject: powerpc/mm: Clear top 16 bits of va only on older cpus As per ISA, we need to do this only for architecture version 2.02 and earlier. This continued to work even for 2.07. But let's not do this for anything after 2.02. ISA 3.0 requires these top bits to be not cleared. Signed-off-by: Aneesh Kumar K.V Acked-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/cputable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index eeeacf6235a3..d81f826d1029 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -137,7 +137,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER4 (gp)", .cpu_features = CPU_FTRS_POWER4, .cpu_user_features = COMMON_USER_POWER4, - .mmu_features = MMU_FTRS_POWER4, + .mmu_features = MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -152,7 +152,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER4+ (gq)", .cpu_features = CPU_FTRS_POWER4, .cpu_user_features = COMMON_USER_POWER4, - .mmu_features = MMU_FTRS_POWER4, + .mmu_features = MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, -- cgit From b275bfb2696387be216db5b7372ee9dcf3f05b80 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 13 Jul 2016 15:05:31 +0530 Subject: powerpc/mm/radix: Add a kernel command line to disable radix This patch adds the kernel command line disable_radix which disable the radix MMU mode even if firmware indicates radix support via ibm,pa-features device tree node. This helps in testing different MMU mode easily. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 48434be99a07..7a01113d525c 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -646,6 +646,14 @@ static void __init early_reserve_mem(void) #endif } +static bool disable_radix; +static int __init parse_disable_radix(char *p) +{ + disable_radix = true; + return 0; +} +early_param("disable_radix", parse_disable_radix); + void __init early_init_devtree(void *params) { phys_addr_t limit; @@ -735,6 +743,11 @@ void __init early_init_devtree(void *params) */ spinning_secondaries = boot_cpu_count - 1; #endif + /* + * now fixup radix MMU mode based on kernel command line + */ + if (disable_radix) + cur_cpu_spec->mmu_features &= ~MMU_FTR_RADIX; #ifdef CONFIG_PPC_POWERNV /* Scan and build the list of machine check recoverable ranges */ -- cgit From 27d1149667352772240655b65372a4294f992ea7 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Wed, 13 Jul 2016 09:14:40 +0800 Subject: powerpc/32: Remove RELOCATABLE_PPC32 It is seldom used in the kernel code and can be easily replaced by either RELOCATABLE or PPC32. So there is no reason to keep a separate kernel option for this. Signed-off-by: Kevin Hao Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/Makefile | 3 +-- arch/powerpc/kernel/vmlinux.lds.S | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 9e7bfc322368..fe4c075bcf50 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -42,7 +42,6 @@ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o -obj64-$(CONFIG_RELOCATABLE) += reloc_64.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o @@ -87,7 +86,7 @@ extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o extra-$(CONFIG_8xx) := head_8xx.o extra-y += vmlinux.lds -obj-$(CONFIG_RELOCATABLE_PPC32) += reloc_32.o +obj-$(CONFIG_RELOCATABLE) += reloc_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC32) += entry_32.o setup_32.o obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 2dd91f79de05..b5fba689fca6 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -165,7 +165,7 @@ SECTIONS . = ALIGN(8); .dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET) { -#ifdef CONFIG_RELOCATABLE_PPC32 +#ifdef CONFIG_PPC32 __dynamic_symtab = .; #endif *(.dynsym) -- cgit From 9402c684613163888714df0955fa1f17142b08bf Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:41 +1000 Subject: powerpc: Factor do_feature_fixup calls 32 and 64-bit do a similar set of calls early on, we move it all to a single common function to make the boot code more readable. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_32.c | 17 ++--------------- arch/powerpc/kernel/setup_64.c | 13 +------------ 2 files changed, 3 insertions(+), 27 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 4abefb525462..3f0aca2b7f63 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -70,7 +70,6 @@ int ucache_bsize; notrace unsigned long __init early_init(unsigned long dt_ptr) { unsigned long offset = reloc_offset(); - struct cpu_spec *spec; /* First zero the BSS -- use memset_io, some platforms don't have * caches on yet */ @@ -81,21 +80,9 @@ notrace unsigned long __init early_init(unsigned long dt_ptr) * Identify the CPU type and fix up code sections * that depend on which cpu we have. */ - spec = identify_cpu(offset, mfspr(SPRN_PVR)); + identify_cpu(offset, mfspr(SPRN_PVR)); - do_feature_fixups(spec->cpu_features, - PTRRELOC(&__start___ftr_fixup), - PTRRELOC(&__stop___ftr_fixup)); - - do_feature_fixups(spec->mmu_features, - PTRRELOC(&__start___mmu_ftr_fixup), - PTRRELOC(&__stop___mmu_ftr_fixup)); - - do_lwsync_fixups(spec->cpu_features, - PTRRELOC(&__start___lwsync_fixup), - PTRRELOC(&__stop___lwsync_fixup)); - - do_final_fixups(); + apply_feature_fixups(); return KERNELBASE + offset; } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 521846c904ca..373ef9d692f6 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -467,18 +467,7 @@ void __init setup_system(void) { DBG(" -> setup_system()\n"); - /* Apply the CPUs-specific and firmware specific fixups to kernel - * text (nop out sections not relevant to this CPU or this firmware) - */ - do_feature_fixups(cur_cpu_spec->cpu_features, - &__start___ftr_fixup, &__stop___ftr_fixup); - do_feature_fixups(cur_cpu_spec->mmu_features, - &__start___mmu_ftr_fixup, &__stop___mmu_ftr_fixup); - do_feature_fixups(powerpc_firmware_features, - &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup); - do_lwsync_fixups(cur_cpu_spec->cpu_features, - &__start___lwsync_fixup, &__stop___lwsync_fixup); - do_final_fixups(); + apply_feature_fixups(); /* * Unflatten the device-tree passed by prom_init or kexec -- cgit From c4bd6cb87c9e28a7d9f4a97db5a06cc538eb5e48 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:42 +1000 Subject: powerpc: Move 64-bit feature fixup earlier Make it part of early_setup() as we really want the feature fixups to be applied before we turn on the MMU since they can have an impact on the various assembly path related to MMU management and interrupts. This makes 64-bit match what 32-bit does. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 373ef9d692f6..0a6d5f70cbd4 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -285,6 +285,9 @@ void __init early_setup(unsigned long dt_ptr) /* Initialize the hash table or TLB handling */ early_init_mmu(); + /* Apply all the dynamic patching */ + apply_feature_fixups(); + /* * At this point, we can let interrupts switch to virtual mode * (the MMU has been setup), so adjust the MSR in the PACA to @@ -467,8 +470,6 @@ void __init setup_system(void) { DBG(" -> setup_system()\n"); - apply_feature_fixups(); - /* * Unflatten the device-tree passed by prom_init or kexec */ -- cgit From de4cf3de594f96f5a27f0e2346dd211beb126f88 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:43 +1000 Subject: powerpc: Move 64-bit memory reserves to setup_arch() There is really no need to do them that early, early_setup() runs before MMU is on, we should do the strict minimum there to get the MMU going. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 0a6d5f70cbd4..155dbcce8ef8 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -295,16 +295,6 @@ void __init early_setup(unsigned long dt_ptr) */ cpu_ready_for_interrupts(); - /* Reserve large chunks of memory for use by CMA for KVM */ - kvm_cma_reserve(); - - /* - * Reserve any gigantic pages requested on the command line. - * memblock needs to have been initialized by the time this is - * called since this will reserve memory. - */ - reserve_hugetlb_gpages(); - DBG(" <- early_setup()\n"); #ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX @@ -687,6 +677,17 @@ void __init setup_arch(char **cmdline_p) dcache_bsize = ppc64_caches.dline_size; icache_bsize = ppc64_caches.iline_size; + + /* Reserve large chunks of memory for use by CMA for KVM */ + kvm_cma_reserve(); + + /* + * Reserve any gigantic pages requested on the command line. + * memblock needs to have been initialized by the time this is + * called since this will reserve memory. + */ + reserve_hugetlb_gpages(); + if (ppc_md.panic) setup_panic(); @@ -711,7 +712,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; #endif - if (ppc_md.setup_arch) ppc_md.setup_arch(); -- cgit From 3808a88985b4f5f5e947c364debce4441a380fb8 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:48 +1000 Subject: powerpc: Move FW feature probing out of pseries probe() We move the function itself to pseries/firmware.c and call it along with almost all other flat device-tree parsers from early_init_devtree() Signed-off-by: Benjamin Herrenschmidt [mpe: Move #ifdefs into the header by providing pseries_probe_fw_features()] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 7a01113d525c..2bd1784e65b3 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -57,6 +57,7 @@ #include #include #include +#include #include @@ -755,6 +756,9 @@ void __init early_init_devtree(void *params) #endif epapr_paravirt_early_init(); + /* Now try to figure out if we are running on LPAR and so on */ + pseries_probe_fw_features(); + DBG(" <- early_init_devtree()\n"); } -- cgit From d3cbff1b5a90afe6cb201aa2187c9609e21f92ad Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:49 +1000 Subject: powerpc: Put exception configuration in a common place The various calls to establish exception endianness and AIL are now done from a single point using already established CPU and FW feature bits to decide what to do. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 56 +++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 155dbcce8ef8..4ffd090633de 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -69,6 +69,7 @@ #include #include #include +#include #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -205,23 +206,50 @@ static void fixup_boot_paca(void) get_paca()->data_offset = 0; } -static void cpu_ready_for_interrupts(void) +static void configure_exceptions(void) { - /* Set IR and DR in PACA MSR */ - get_paca()->kernel_msr = MSR_KERNEL; - /* - * Enable AIL if supported, and we are in hypervisor mode. If we are - * not in hypervisor mode, we enable relocation-on interrupts later - * in pSeries_setup_arch() using the H_SET_MODE hcall. + * Setup the trampolines from the lowmem exception vectors + * to the kdump kernel when not using a relocatable kernel. */ - if (cpu_has_feature(CPU_FTR_HVMODE) && - cpu_has_feature(CPU_FTR_ARCH_207S)) { - unsigned long lpcr = mfspr(SPRN_LPCR); - mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); + setup_kdump_trampoline(); + + /* Under a PAPR hypervisor, we need hypercalls */ + if (firmware_has_feature(FW_FEATURE_SET_MODE)) { + /* Enable AIL if possible */ + pseries_enable_reloc_on_exc(); + + /* + * Tell the hypervisor that we want our exceptions to + * be taken in little endian mode. + * + * We don't call this for big endian as our calling convention + * makes us always enter in BE, and the call may fail under + * some circumstances with kdump. + */ +#ifdef __LITTLE_ENDIAN__ + pseries_little_endian_exceptions(); +#endif + } else { + /* Set endian mode using OPAL */ + if (firmware_has_feature(FW_FEATURE_OPAL)) + opal_configure_cores(); + + /* Enable AIL if supported, and we are in hypervisor mode */ + if (cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_ARCH_207S)) { + unsigned long lpcr = mfspr(SPRN_LPCR); + mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); + } } } +static void cpu_ready_for_interrupts(void) +{ + /* Set IR and DR in PACA MSR */ + get_paca()->kernel_msr = MSR_KERNEL; +} + /* * Early initialization entry point. This is called by head.S * with MMU translation disabled. We rely on the "feature" of @@ -277,10 +305,10 @@ void __init early_setup(unsigned long dt_ptr) probe_machine(); /* - * Setup the trampolines from the lowmem exception vectors - * to the kdump kernel when not using a relocatable kernel. + * Configure exception handlers. This include setting up trampolines + * if needed, setting exception endian mode, etc... */ - setup_kdump_trampoline(); + configure_exceptions(); /* Initialize the hash table or TLB handling */ early_init_mmu(); -- cgit From 166dd7d3fbf2df183926f0e4b4855f6cbd8da945 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:51 +1000 Subject: powerpc/64: Move MMU backend selection out of platform code We move it into early_mmu_init() based on firmware features. For PS3, we have to move the setting of these into early_init_devtree(). Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 2bd1784e65b3..bae3db791150 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -759,6 +759,12 @@ void __init early_init_devtree(void *params) /* Now try to figure out if we are running on LPAR and so on */ pseries_probe_fw_features(); +#ifdef CONFIG_PPC_PS3 + /* Identify PS3 firmware */ + if (of_flat_dt_is_compatible(of_get_flat_dt_root(), "sony,ps3")) + powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE; +#endif + DBG(" <- early_init_devtree()\n"); } -- cgit From 7025776ed1ebdfa1959932e7a4662c2f88607df0 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:58 +1000 Subject: powerpc/mm: Move hash table ops to a separate structure Moving probe_machine() to after mmu init will cause the ppc_md fields relative to the hash table management to be overwritten. Since we have essentially disconnected the machine type from the hash backend ops, finish the job by moving them to a different structure. The only callback that didn't quite fix is update_partition_table since this is not specific to hash, so I moved it to a standalone variable for now. We can revisit later if needed. Signed-off-by: Benjamin Herrenschmidt [mpe: Fix ppc64e build failure in kexec] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/machine_kexec_64.c | 9 +++++++-- arch/powerpc/kernel/misc_64.S | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 50bf55135ef8..4c780a342282 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -55,7 +55,7 @@ int default_machine_kexec_prepare(struct kimage *image) const unsigned long *basep; const unsigned int *sizep; - if (!ppc_md.hpte_clear_all) + if (!mmu_hash_ops.hpte_clear_all) return -ENOENT; /* @@ -380,7 +380,12 @@ void default_machine_kexec(struct kimage *image) */ kexec_sequence(&kexec_stack, image->start, image, page_address(image->control_code_page), - ppc_md.hpte_clear_all); +#ifdef CONFIG_PPC_STD_MMU + mmu_hash_ops.hpte_clear_all +#else + NULL +#endif + ); /* NOTREACHED */ } diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 7a8519052b14..cb195157b318 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -667,7 +667,7 @@ _GLOBAL(kexec_sequence) mr r12,r27 #endif mtctr r12 - bctrl /* ppc_md.hpte_clear_all(void); */ + bctrl /* mmu_hash_ops.hpte_clear_all(void); */ #endif /* !CONFIG_PPC_BOOK3E */ /* -- cgit From 84b62c72faa197a5c9b75ee93527add31695fb32 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:59 +1000 Subject: powerpc: Ensure that ppc_md is empty before probing for machine type Anything in there will be overwritten, so it helps catching nasty bugs if we check that it's indeed full of NULL's before we do so. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup-common.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 2a3564caafd3..b8ee1c867ee3 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -575,6 +575,7 @@ void probe_machine(void) { extern struct machdep_calls __machine_desc_start; extern struct machdep_calls __machine_desc_end; + unsigned int i; /* * Iterate all ppc_md structures until we find the proper @@ -582,6 +583,17 @@ void probe_machine(void) */ DBG("Probing machine type ...\n"); + /* + * Check ppc_md is empty, if not we have a bug, ie, we setup an + * entry before probe_machine() which will be overwritten + */ + for (i = 0; i < (sizeof(ppc_md) / sizeof(void *)); i++) { + if (((void **)&ppc_md)[i]) { + printk(KERN_ERR "Entry %d in ppc_md non empty before" + " machine probe !\n", i); + } + } + for (machine_id = &__machine_desc_start; machine_id < &__machine_desc_end; machine_id++) { -- cgit From 406b0b6ae3fcd5c7946a68a9e43b470c79d292a2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:04:00 +1000 Subject: powerpc/64: Move 64-bit probe_machine() to later in the boot process We no long need the machine type that early, so we can move probe_machine() to after the device-tree has been expanded. This will allow further consolidation. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 4ffd090633de..883d527899a7 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -301,9 +301,6 @@ void __init early_setup(unsigned long dt_ptr) setup_paca(&paca[boot_cpuid]); fixup_boot_paca(); - /* Probe the machine type */ - probe_machine(); - /* * Configure exception handlers. This include setting up trampolines * if needed, setting exception endian mode, etc... @@ -511,6 +508,9 @@ void __init setup_system(void) */ check_for_initrd(); + /* Probe the machine type */ + probe_machine(); + /* * Do some platform specific early initializations, that includes * setting up the hash table pointers. It also sets up some interrupt-mapping -- cgit From 565713840445b7ccafb28dc1230d57d40bcb42a5 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:04:05 +1000 Subject: powerpc: Move 32-bit probe() machine to later in the boot process This converts all the 32-bit platforms to use the expanded device-tree which is a pretty mechanical change. Unlike 64-bit, the 32-bit kernel didn't rely on platform initializations to setup the MMU since it sets it up entirely before probe_machine() so the move has comparatively less consequences though it's a bigger patch. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_32.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 3f0aca2b7f63..e7bb4e76896a 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -113,23 +113,7 @@ notrace void __init machine_init(u64 dt_ptr) early_init_mmu(); - probe_machine(); - setup_kdump_trampoline(); - -#ifdef CONFIG_6xx - if (cpu_has_feature(CPU_FTR_CAN_DOZE) || - cpu_has_feature(CPU_FTR_CAN_NAP)) - ppc_md.power_save = ppc6xx_idle; -#endif - -#ifdef CONFIG_E500 - if (cpu_has_feature(CPU_FTR_CAN_DOZE) || - cpu_has_feature(CPU_FTR_CAN_NAP)) - ppc_md.power_save = e500_idle; -#endif - if (ppc_md.progress) - ppc_md.progress("id mach(): done", 0x200); } /* Checks "l2cr=xxxx" command-line option */ @@ -249,6 +233,21 @@ static void __init exc_lvl_early_init(void) #define exc_lvl_early_init() #endif +static void setup_power_save(void) +{ +#ifdef CONFIG_6xx + if (cpu_has_feature(CPU_FTR_CAN_DOZE) || + cpu_has_feature(CPU_FTR_CAN_NAP)) + ppc_md.power_save = ppc6xx_idle; +#endif + +#ifdef CONFIG_E500 + if (cpu_has_feature(CPU_FTR_CAN_DOZE) || + cpu_has_feature(CPU_FTR_CAN_NAP)) + ppc_md.power_save = e500_idle; +#endif +} + /* Warning, IO base is not yet inited */ void __init setup_arch(char **cmdline_p) { @@ -260,6 +259,10 @@ void __init setup_arch(char **cmdline_p) unflatten_device_tree(); check_for_initrd(); + probe_machine(); + + setup_power_save(); + if (ppc_md.init_early) ppc_md.init_early(); -- cgit From f2d576948d6cec16e4aae201d738c4f22039a551 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:04:06 +1000 Subject: powerpc: Get rid of ppc_md.init_early() It is now called right after platform probe, so the probe function can just do the job. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_32.c | 3 --- arch/powerpc/kernel/setup_64.c | 8 -------- 2 files changed, 11 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index e7bb4e76896a..22347e87c354 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -263,9 +263,6 @@ void __init setup_arch(char **cmdline_p) setup_power_save(); - if (ppc_md.init_early) - ppc_md.init_early(); - find_legacy_serial_ports(); smp_setup_cpu_maps(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 883d527899a7..8b9768a97387 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -511,14 +511,6 @@ void __init setup_system(void) /* Probe the machine type */ probe_machine(); - /* - * Do some platform specific early initializations, that includes - * setting up the hash table pointers. It also sets up some interrupt-mapping - * related options that will be used by finish_device_tree() - */ - if (ppc_md.init_early) - ppc_md.init_early(); - /* * We can discover serial ports now since the above did setup the * hash table management for us, thus ioremap works. We do that early -- cgit From bf1b61fb574bfe13ab71347389a2ab16f673d24f Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:04:07 +1000 Subject: powerpc/64: Move the boot time info banner to a separate function Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 64 ++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 31 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 8b9768a97387..2395a88b1142 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -476,6 +476,37 @@ static void __init initialize_cache_info(void) DBG(" <- initialize_cache_info()\n"); } +static __init void print_system_info(void) +{ + pr_info("-----------------------------------------------------\n"); + pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); + pr_info("phys_mem_size = 0x%llx\n", memblock_phys_mem_size()); + + if (ppc64_caches.dline_size != 0x80) + pr_info("dcache_line_size = 0x%x\n", ppc64_caches.dline_size); + if (ppc64_caches.iline_size != 0x80) + pr_info("icache_line_size = 0x%x\n", ppc64_caches.iline_size); + + pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); + pr_info(" possible = 0x%016lx\n", CPU_FTRS_POSSIBLE); + pr_info(" always = 0x%016lx\n", CPU_FTRS_ALWAYS); + pr_info("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features, + cur_cpu_spec->cpu_user_features2); + pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features); + pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); + +#ifdef CONFIG_PPC_STD_MMU_64 + if (htab_address) + pr_info("htab_address = 0x%p\n", htab_address); + + pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask); +#endif + + if (PHYSICAL_START > 0) + pr_info("physical_start = 0x%llx\n", + (unsigned long long)PHYSICAL_START); + pr_info("-----------------------------------------------------\n"); +} /* * Do some initial setup of the system. The parameters are those which @@ -543,37 +574,8 @@ void __init setup_system(void) smp_release_cpus(); #endif - pr_info("Starting Linux %s %s\n", init_utsname()->machine, - init_utsname()->version); - - pr_info("-----------------------------------------------------\n"); - pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); - pr_info("phys_mem_size = 0x%llx\n", memblock_phys_mem_size()); - - if (ppc64_caches.dline_size != 0x80) - pr_info("dcache_line_size = 0x%x\n", ppc64_caches.dline_size); - if (ppc64_caches.iline_size != 0x80) - pr_info("icache_line_size = 0x%x\n", ppc64_caches.iline_size); - - pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); - pr_info(" possible = 0x%016lx\n", CPU_FTRS_POSSIBLE); - pr_info(" always = 0x%016lx\n", CPU_FTRS_ALWAYS); - pr_info("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features, - cur_cpu_spec->cpu_user_features2); - pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features); - pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); - -#ifdef CONFIG_PPC_STD_MMU_64 - if (htab_address) - pr_info("htab_address = 0x%p\n", htab_address); - - pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask); -#endif - - if (PHYSICAL_START > 0) - pr_info("physical_start = 0x%llx\n", - (unsigned long long)PHYSICAL_START); - pr_info("-----------------------------------------------------\n"); + /* Print various info about the machine that has been gathered so far. */ + print_system_info(); DBG(" <- setup_system()\n"); } -- cgit From 9df549afeab4ea968b6d83cf9d7a1e3c577a9846 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:04:08 +1000 Subject: powerpc/64: Move setting of {i,d}cache_bsize to initialize_cache_info() Also remove the completely osbolete comment. We *do* look in the device-tree. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 2395a88b1142..aca215d77fe3 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -473,6 +473,10 @@ static void __init initialize_cache_info(void) } } + /* For use by binfmt_elf */ + dcache_bsize = ppc64_caches.dline_size; + icache_bsize = ppc64_caches.iline_size; + DBG(" <- initialize_cache_info()\n"); } @@ -691,15 +695,6 @@ void __init setup_arch(char **cmdline_p) { *cmdline_p = boot_command_line; - /* - * Set cache line size based on type of cpu as a default. - * Systems with OF can look in the properties on the cpu node(s) - * for a possibly more accurate value. - */ - dcache_bsize = ppc64_caches.dline_size; - icache_bsize = ppc64_caches.iline_size; - - /* Reserve large chunks of memory for use by CMA for KVM */ kvm_cma_reserve(); -- cgit From fa745a129cae93ca5d871ebac2a8f6c27ae3fbf2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:04:09 +1000 Subject: powerpc/64: Move the content of setup_system() to setup_arch() And kill setup_system(). Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/head_64.S | 5 +- arch/powerpc/kernel/setup_64.c | 134 +++++++++++++++++++---------------------- 2 files changed, 63 insertions(+), 76 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 064cd9397836..f765b0434731 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -941,7 +941,7 @@ start_here_multiplatform: mtspr SPRN_SRR1,r4 RFI b . /* prevent speculative execution */ - + /* This is where all platforms converge execution */ start_here_common: @@ -951,9 +951,6 @@ start_here_common: /* Load the TOC (virtual address) */ ld r2,PACATOC(r13) - /* Do more system initializations in virtual mode */ - bl setup_system - /* Mark interrupts soft and hard disabled (they might be enabled * in the PACA when doing hotplug) */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index aca215d77fe3..61c3e6c42262 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -512,78 +512,6 @@ static __init void print_system_info(void) pr_info("-----------------------------------------------------\n"); } -/* - * Do some initial setup of the system. The parameters are those which - * were passed in from the bootloader. - */ -void __init setup_system(void) -{ - DBG(" -> setup_system()\n"); - - /* - * Unflatten the device-tree passed by prom_init or kexec - */ - unflatten_device_tree(); - - /* - * Fill the ppc64_caches & systemcfg structures with informations - * retrieved from the device-tree. - */ - initialize_cache_info(); - -#ifdef CONFIG_PPC_RTAS - /* - * Initialize RTAS if available - */ - rtas_initialize(); -#endif /* CONFIG_PPC_RTAS */ - - /* - * Check if we have an initrd provided via the device-tree - */ - check_for_initrd(); - - /* Probe the machine type */ - probe_machine(); - - /* - * We can discover serial ports now since the above did setup the - * hash table management for us, thus ioremap works. We do that early - * so that further code can be debugged - */ - find_legacy_serial_ports(); - - /* - * Register early console - */ - register_early_udbg_console(); - - /* - * Initialize xmon - */ - xmon_setup(); - - smp_setup_cpu_maps(); - check_smt_enabled(); - setup_tlb_core_data(); - - /* - * Freescale Book3e parts spin in a loop provided by firmware, - * so smp_release_cpus() does nothing for them - */ -#if defined(CONFIG_SMP) - /* Release secondary cpus out of their spinloops at 0x60 now that - * we can map physical -> logical CPU ids - */ - smp_release_cpus(); -#endif - - /* Print various info about the machine that has been gathered so far. */ - print_system_info(); - - DBG(" <- setup_system()\n"); -} - /* This returns the limit below which memory accesses to the linear * mapping are guarnateed not to cause a TLB or SLB miss. This is * used to allocate interrupt or emergency stacks for which our @@ -695,6 +623,68 @@ void __init setup_arch(char **cmdline_p) { *cmdline_p = boot_command_line; + /* + * Unflatten the device-tree passed by prom_init or kexec + */ + unflatten_device_tree(); + + /* + * Fill the ppc64_caches & systemcfg structures with informations + * retrieved from the device-tree. + */ + initialize_cache_info(); + +#ifdef CONFIG_PPC_RTAS + /* + * Initialize RTAS if available + */ + rtas_initialize(); +#endif /* CONFIG_PPC_RTAS */ + + /* + * Check if we have an initrd provided via the device-tree + */ + check_for_initrd(); + + /* Probe the machine type */ + probe_machine(); + + /* + * We can discover serial ports now since the above did setup the + * hash table management for us, thus ioremap works. We do that early + * so that further code can be debugged + */ + find_legacy_serial_ports(); + + /* + * Register early console + */ + register_early_udbg_console(); + + /* + * Initialize xmon + */ + xmon_setup(); + + smp_setup_cpu_maps(); + check_smt_enabled(); + setup_tlb_core_data(); + + /* + * Freescale Book3e parts spin in a loop provided by firmware, + * so smp_release_cpus() does nothing for them + */ +#if defined(CONFIG_SMP) + /* + * Release secondary cpus out of their spinloops at 0x60 now that + * we can map physical -> logical CPU ids + */ + smp_release_cpus(); +#endif + + /* Print various info about the machine that has been gathered so far. */ + print_system_info(); + /* Reserve large chunks of memory for use by CMA for KVM */ kvm_cma_reserve(); -- cgit From 8f212cb26fc74dcf7b8c5c3dbcf3c3741990c31d Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:04:10 +1000 Subject: powerpc/32: Move cache info inits to a separate function Matches 64-bit. Also move the call to the same spot as ppc64 Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_32.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 22347e87c354..5457911d662d 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -248,6 +248,21 @@ static void setup_power_save(void) #endif } +static __init void initialize_cache_info(void) +{ + /* + * Set cache line size based on type of cpu as a default. + * Systems with OF can look in the properties on the cpu node(s) + * for a possibly more accurate value. + */ + dcache_bsize = cur_cpu_spec->dcache_bsize; + icache_bsize = cur_cpu_spec->icache_bsize; + ucache_bsize = 0; + if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE)) + ucache_bsize = icache_bsize = dcache_bsize; +} + + /* Warning, IO base is not yet inited */ void __init setup_arch(char **cmdline_p) { @@ -257,6 +272,7 @@ void __init setup_arch(char **cmdline_p) loops_per_jiffy = 500000000 / HZ; unflatten_device_tree(); + initialize_cache_info(); check_for_initrd(); probe_machine(); @@ -272,17 +288,6 @@ void __init setup_arch(char **cmdline_p) xmon_setup(); - /* - * Set cache line size based on type of cpu as a default. - * Systems with OF can look in the properties on the cpu node(s) - * for a possibly more accurate value. - */ - dcache_bsize = cur_cpu_spec->dcache_bsize; - icache_bsize = cur_cpu_spec->icache_bsize; - ucache_bsize = 0; - if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE)) - ucache_bsize = icache_bsize = dcache_bsize; - if (ppc_md.panic) setup_panic(); -- cgit From e39afba3aa11f7088ddc00d37ab34a85d960a76e Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:04:11 +1000 Subject: powerpc: Re-order the call to smp_setup_cpu_maps() It makes more sense to do it before intializing xmon() as xmon might use the info in there. We do want to register the console early though in case we want some functioning printk's in the cpu map setup. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_32.c | 4 ++-- arch/powerpc/kernel/setup_64.c | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 5457911d662d..58674b602149 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -281,11 +281,11 @@ void __init setup_arch(char **cmdline_p) find_legacy_serial_ports(); - smp_setup_cpu_maps(); - /* Register early console */ register_early_udbg_console(); + smp_setup_cpu_maps(); + xmon_setup(); if (ppc_md.panic) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 61c3e6c42262..3fd59bca06f7 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -661,12 +661,13 @@ void __init setup_arch(char **cmdline_p) */ register_early_udbg_console(); + smp_setup_cpu_maps(); + /* * Initialize xmon */ xmon_setup(); - smp_setup_cpu_maps(); check_smt_enabled(); setup_tlb_core_data(); -- cgit From f7b9ebb79e90b19bf6a2cb805a536258437fc3fa Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:04:12 +1000 Subject: powerpc: Re-order setup_panic() Do it right after probe_machine() since it's about testing ppc_md, and put the test in the common code. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup-common.c | 2 ++ arch/powerpc/kernel/setup_32.c | 5 ++--- arch/powerpc/kernel/setup_64.c | 5 ++--- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index b8ee1c867ee3..ca9255e3b763 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -688,6 +688,8 @@ static struct notifier_block ppc_panic_block = { void __init setup_panic(void) { + if (!ppc_md.panic) + return; atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block); } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 58674b602149..6247a3a4fd4b 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -277,6 +277,8 @@ void __init setup_arch(char **cmdline_p) probe_machine(); + setup_panic(); + setup_power_save(); find_legacy_serial_ports(); @@ -288,9 +290,6 @@ void __init setup_arch(char **cmdline_p) xmon_setup(); - if (ppc_md.panic) - setup_panic(); - init_mm.start_code = (unsigned long)_stext; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 3fd59bca06f7..f55c25dff02c 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -649,6 +649,8 @@ void __init setup_arch(char **cmdline_p) /* Probe the machine type */ probe_machine(); + setup_panic(); + /* * We can discover serial ports now since the above did setup the * hash table management for us, thus ioremap works. We do that early @@ -696,9 +698,6 @@ void __init setup_arch(char **cmdline_p) */ reserve_hugetlb_gpages(); - if (ppc_md.panic) - setup_panic(); - klp_init_thread_info(&init_thread_info); init_mm.start_code = (unsigned long)_stext; -- cgit From 009776baa18448b223be73ac74912fef7e17b9e2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:07:50 +1000 Subject: powerpc/64: Make a few boot functions __init Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index f55c25dff02c..fba96ada3012 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -198,7 +198,7 @@ early_param("smt-enabled", early_smt_enabled); #endif /* CONFIG_SMP */ /** Fix up paca fields required for the boot cpu */ -static void fixup_boot_paca(void) +static void __init fixup_boot_paca(void) { /* The boot cpu is started */ get_paca()->cpu_start = 1; @@ -206,7 +206,7 @@ static void fixup_boot_paca(void) get_paca()->data_offset = 0; } -static void configure_exceptions(void) +static void __init configure_exceptions(void) { /* * Setup the trampolines from the lowmem exception vectors @@ -517,7 +517,7 @@ static __init void print_system_info(void) * used to allocate interrupt or emergency stacks for which our * exception entry path doesn't deal with being interrupted. */ -static u64 safe_stack_limit(void) +static __init u64 safe_stack_limit(void) { #ifdef CONFIG_PPC_BOOK3E /* Freescale BookE bolts the entire linear mapping */ -- cgit From b1923caa6e641f3d0a93b5d045aef67ded5aef67 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:07:51 +1000 Subject: powerpc: Merge 32-bit and 64-bit setup_arch() There is little enough differences now. mpe: Add a/p/k/setup.h to contain the prototypes and empty versions of functions we need, rather than using weak functions. Add a few other empty versions to avoid as many #ifdefs as possible in the code. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup-common.c | 173 ++++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/setup.h | 58 ++++++++++++ arch/powerpc/kernel/setup_32.c | 65 +------------- arch/powerpc/kernel/setup_64.c | 175 ++----------------------------------- 4 files changed, 242 insertions(+), 229 deletions(-) create mode 100644 arch/powerpc/kernel/setup.h (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index ca9255e3b763..714b4ba7ab86 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -61,6 +62,12 @@ #include #include #include +#include +#include +#include +#include + +#include "setup.h" #ifdef DEBUG #include @@ -758,3 +765,169 @@ void arch_setup_pdev_archdata(struct platform_device *pdev) pdev->dev.dma_mask = &pdev->archdata.dma_mask; set_dma_ops(&pdev->dev, &dma_direct_ops); } + +static __init void print_system_info(void) +{ + pr_info("-----------------------------------------------------\n"); +#ifdef CONFIG_PPC_STD_MMU_64 + pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); +#endif +#ifdef CONFIG_PPC_STD_MMU_32 + pr_info("Hash_size = 0x%lx\n", Hash_size); +#endif + pr_info("phys_mem_size = 0x%llx\n", + (unsigned long long)memblock_phys_mem_size()); + + pr_info("dcache_bsize = 0x%x\n", dcache_bsize); + pr_info("icache_bsize = 0x%x\n", icache_bsize); + if (ucache_bsize != 0) + pr_info("ucache_bsize = 0x%x\n", ucache_bsize); + + pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); + pr_info(" possible = 0x%016lx\n", + (unsigned long)CPU_FTRS_POSSIBLE); + pr_info(" always = 0x%016lx\n", + (unsigned long)CPU_FTRS_ALWAYS); + pr_info("cpu_user_features = 0x%08x 0x%08x\n", + cur_cpu_spec->cpu_user_features, + cur_cpu_spec->cpu_user_features2); + pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features); +#ifdef CONFIG_PPC64 + pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); +#endif + +#ifdef CONFIG_PPC_STD_MMU_64 + if (htab_address) + pr_info("htab_address = 0x%p\n", htab_address); + if (htab_hash_mask) + pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask); +#endif +#ifdef CONFIG_PPC_STD_MMU_32 + if (Hash) + pr_info("Hash = 0x%p\n", Hash); + if (Hash_mask) + pr_info("Hash_mask = 0x%lx\n", Hash_mask); +#endif + + if (PHYSICAL_START > 0) + pr_info("physical_start = 0x%llx\n", + (unsigned long long)PHYSICAL_START); + pr_info("-----------------------------------------------------\n"); +} + +/* + * Called into from start_kernel this initializes memblock, which is used + * to manage page allocation until mem_init is called. + */ +void __init setup_arch(char **cmdline_p) +{ + *cmdline_p = boot_command_line; + + /* Set a half-reasonable default so udelay does something sensible */ + loops_per_jiffy = 500000000 / HZ; + + /* Unflatten the device-tree passed by prom_init or kexec */ + unflatten_device_tree(); + + /* + * Initialize cache line/block info from device-tree (on ppc64) or + * just cputable (on ppc32). + */ + initialize_cache_info(); + + /* Initialize RTAS if available. */ + rtas_initialize(); + + /* Check if we have an initrd provided via the device-tree. */ + check_for_initrd(); + + /* Probe the machine type, establish ppc_md. */ + probe_machine(); + + /* Setup panic notifier if requested by the platform. */ + setup_panic(); + + /* + * Configure ppc_md.power_save (ppc32 only, 64-bit machines do + * it from their respective probe() function. + */ + setup_power_save(); + + /* Discover standard serial ports. */ + find_legacy_serial_ports(); + + /* Register early console with the printk subsystem. */ + register_early_udbg_console(); + + /* Setup the various CPU maps based on the device-tree. */ + smp_setup_cpu_maps(); + + /* Initialize xmon. */ + xmon_setup(); + + /* Check the SMT related command line arguments (ppc64). */ + check_smt_enabled(); + + /* On BookE, setup per-core TLB data structures. */ + setup_tlb_core_data(); + + /* + * Release secondary cpus out of their spinloops at 0x60 now that + * we can map physical -> logical CPU ids. + * + * Freescale Book3e parts spin in a loop provided by firmware, + * so smp_release_cpus() does nothing for them. + */ +#ifdef CONFIG_SMP + smp_release_cpus(); +#endif + + /* Print various info about the machine that has been gathered so far. */ + print_system_info(); + + /* Reserve large chunks of memory for use by CMA for KVM. */ + kvm_cma_reserve(); + + /* + * Reserve any gigantic pages requested on the command line. + * memblock needs to have been initialized by the time this is + * called since this will reserve memory. + */ + reserve_hugetlb_gpages(); + + klp_init_thread_info(&init_thread_info); + + init_mm.start_code = (unsigned long)_stext; + init_mm.end_code = (unsigned long) _etext; + init_mm.end_data = (unsigned long) _edata; + init_mm.brk = klimit; +#ifdef CONFIG_PPC_64K_PAGES + init_mm.context.pte_frag = NULL; +#endif +#ifdef CONFIG_SPAPR_TCE_IOMMU + mm_iommu_init(&init_mm.context); +#endif + irqstack_early_init(); + exc_lvl_early_init(); + emergency_stack_init(); + + initmem_init(); + +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif + if (ppc_md.setup_arch) + ppc_md.setup_arch(); + + paging_init(); + + /* Initialize the MMU context management stuff. */ + mmu_context_init(); + +#ifdef CONFIG_PPC64 + /* Interrupt code needs to be 64K-aligned. */ + if ((unsigned long)_stext & 0xffff) + panic("Kernelbase not 64K-aligned (0x%lx)!\n", + (unsigned long)_stext); +#endif +} diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h new file mode 100644 index 000000000000..cfba134b3024 --- /dev/null +++ b/arch/powerpc/kernel/setup.h @@ -0,0 +1,58 @@ +/* + * Prototypes for functions that are shared between setup_(32|64|common).c + * + * Copyright 2016 Michael Ellerman, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __ARCH_POWERPC_KERNEL_SETUP_H +#define __ARCH_POWERPC_KERNEL_SETUP_H + +void initialize_cache_info(void); +void irqstack_early_init(void); + +#ifdef CONFIG_PPC32 +void setup_power_save(void); +#else +static inline void setup_power_save(void) { }; +#endif + +#if defined(CONFIG_PPC64) && defined(CONFIG_SMP) +void check_smt_enabled(void); +#else +static inline void check_smt_enabled(void) { }; +#endif + +#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) +void setup_tlb_core_data(void); +#else +static inline void setup_tlb_core_data(void) { }; +#endif + +#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_BOOKE) || defined(CONFIG_40x) +void exc_lvl_early_init(void); +#else +static inline void exc_lvl_early_init(void) { }; +#endif + +#ifdef CONFIG_PPC64 +void emergency_stack_init(void); +#else +static inline void emergency_stack_init(void) { }; +#endif + +/* + * Having this in kvm_ppc.h makes include dependencies too + * tricky to solve for setup-common.c so have it here. + */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +void kvm_cma_reserve(void); +#else +static inline void kvm_cma_reserve(void) { }; +#endif + +#endif /* __ARCH_POWERPC_KERNEL_SETUP_H */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 6247a3a4fd4b..00f57754407e 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #define DBG(fmt...) @@ -191,7 +190,7 @@ int __init ppc_init(void) arch_initcall(ppc_init); -static void __init irqstack_early_init(void) +void __init irqstack_early_init(void) { unsigned int i; @@ -206,7 +205,7 @@ static void __init irqstack_early_init(void) } #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) -static void __init exc_lvl_early_init(void) +void __init exc_lvl_early_init(void) { unsigned int i, hw_cpu; @@ -229,11 +228,9 @@ static void __init exc_lvl_early_init(void) #endif } } -#else -#define exc_lvl_early_init() #endif -static void setup_power_save(void) +void __init setup_power_save(void) { #ifdef CONFIG_6xx if (cpu_has_feature(CPU_FTR_CAN_DOZE) || @@ -248,7 +245,7 @@ static void setup_power_save(void) #endif } -static __init void initialize_cache_info(void) +__init void initialize_cache_info(void) { /* * Set cache line size based on type of cpu as a default. @@ -261,57 +258,3 @@ static __init void initialize_cache_info(void) if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE)) ucache_bsize = icache_bsize = dcache_bsize; } - - -/* Warning, IO base is not yet inited */ -void __init setup_arch(char **cmdline_p) -{ - *cmdline_p = boot_command_line; - - /* so udelay does something sensible, assume <= 1000 bogomips */ - loops_per_jiffy = 500000000 / HZ; - - unflatten_device_tree(); - initialize_cache_info(); - check_for_initrd(); - - probe_machine(); - - setup_panic(); - - setup_power_save(); - - find_legacy_serial_ports(); - - /* Register early console */ - register_early_udbg_console(); - - smp_setup_cpu_maps(); - - xmon_setup(); - - init_mm.start_code = (unsigned long)_stext; - init_mm.end_code = (unsigned long) _etext; - init_mm.end_data = (unsigned long) _edata; - init_mm.brk = klimit; - - exc_lvl_early_init(); - - irqstack_early_init(); - - initmem_init(); - if ( ppc_md.progress ) ppc_md.progress("setup_arch: initmem", 0x3eab); - -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif - - if (ppc_md.setup_arch) - ppc_md.setup_arch(); - if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab); - - paging_init(); - - /* Initialize the MMU context management stuff */ - mmu_context_init(); -} diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index fba96ada3012..d8216aed22b7 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include @@ -64,12 +63,10 @@ #include #include #include -#include #include -#include -#include #include #include +#include #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -100,7 +97,7 @@ int icache_bsize; int ucache_bsize; #if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) -static void setup_tlb_core_data(void) +void __init setup_tlb_core_data(void) { int cpu; @@ -133,10 +130,6 @@ static void setup_tlb_core_data(void) } } } -#else -static void setup_tlb_core_data(void) -{ -} #endif #ifdef CONFIG_SMP @@ -144,7 +137,7 @@ static void setup_tlb_core_data(void) static char *smt_enabled_cmdline; /* Look for ibm,smt-enabled OF option */ -static void check_smt_enabled(void) +void __init check_smt_enabled(void) { struct device_node *dn; const char *smt_option; @@ -193,8 +186,6 @@ static int __init early_smt_enabled(char *p) } early_param("smt-enabled", early_smt_enabled); -#else -#define check_smt_enabled() #endif /* CONFIG_SMP */ /** Fix up paca fields required for the boot cpu */ @@ -408,7 +399,7 @@ void smp_release_cpus(void) * cache informations about the CPU that will be used by cache flush * routines and/or provided to userland */ -static void __init initialize_cache_info(void) +void __init initialize_cache_info(void) { struct device_node *np; unsigned long num_cpus = 0; @@ -480,38 +471,6 @@ static void __init initialize_cache_info(void) DBG(" <- initialize_cache_info()\n"); } -static __init void print_system_info(void) -{ - pr_info("-----------------------------------------------------\n"); - pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); - pr_info("phys_mem_size = 0x%llx\n", memblock_phys_mem_size()); - - if (ppc64_caches.dline_size != 0x80) - pr_info("dcache_line_size = 0x%x\n", ppc64_caches.dline_size); - if (ppc64_caches.iline_size != 0x80) - pr_info("icache_line_size = 0x%x\n", ppc64_caches.iline_size); - - pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); - pr_info(" possible = 0x%016lx\n", CPU_FTRS_POSSIBLE); - pr_info(" always = 0x%016lx\n", CPU_FTRS_ALWAYS); - pr_info("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features, - cur_cpu_spec->cpu_user_features2); - pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features); - pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); - -#ifdef CONFIG_PPC_STD_MMU_64 - if (htab_address) - pr_info("htab_address = 0x%p\n", htab_address); - - pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask); -#endif - - if (PHYSICAL_START > 0) - pr_info("physical_start = 0x%llx\n", - (unsigned long long)PHYSICAL_START); - pr_info("-----------------------------------------------------\n"); -} - /* This returns the limit below which memory accesses to the linear * mapping are guarnateed not to cause a TLB or SLB miss. This is * used to allocate interrupt or emergency stacks for which our @@ -533,7 +492,7 @@ static __init u64 safe_stack_limit(void) #endif } -static void __init irqstack_early_init(void) +void __init irqstack_early_init(void) { u64 limit = safe_stack_limit(); unsigned int i; @@ -553,7 +512,7 @@ static void __init irqstack_early_init(void) } #ifdef CONFIG_PPC_BOOK3E -static void __init exc_lvl_early_init(void) +void __init exc_lvl_early_init(void) { unsigned int i; unsigned long sp; @@ -575,8 +534,6 @@ static void __init exc_lvl_early_init(void) if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) patch_exception(0x040, exc_debug_debug_book3e); } -#else -#define exc_lvl_early_init() #endif /* @@ -584,7 +541,7 @@ static void __init exc_lvl_early_init(void) * early in SMP boots before relocation is enabled. Exclusive emergency * stack for machine checks. */ -static void __init emergency_stack_init(void) +void __init emergency_stack_init(void) { u64 limit; unsigned int i; @@ -615,124 +572,6 @@ static void __init emergency_stack_init(void) } } -/* - * Called into from start_kernel this initializes memblock, which is used - * to manage page allocation until mem_init is called. - */ -void __init setup_arch(char **cmdline_p) -{ - *cmdline_p = boot_command_line; - - /* - * Unflatten the device-tree passed by prom_init or kexec - */ - unflatten_device_tree(); - - /* - * Fill the ppc64_caches & systemcfg structures with informations - * retrieved from the device-tree. - */ - initialize_cache_info(); - -#ifdef CONFIG_PPC_RTAS - /* - * Initialize RTAS if available - */ - rtas_initialize(); -#endif /* CONFIG_PPC_RTAS */ - - /* - * Check if we have an initrd provided via the device-tree - */ - check_for_initrd(); - - /* Probe the machine type */ - probe_machine(); - - setup_panic(); - - /* - * We can discover serial ports now since the above did setup the - * hash table management for us, thus ioremap works. We do that early - * so that further code can be debugged - */ - find_legacy_serial_ports(); - - /* - * Register early console - */ - register_early_udbg_console(); - - smp_setup_cpu_maps(); - - /* - * Initialize xmon - */ - xmon_setup(); - - check_smt_enabled(); - setup_tlb_core_data(); - - /* - * Freescale Book3e parts spin in a loop provided by firmware, - * so smp_release_cpus() does nothing for them - */ -#if defined(CONFIG_SMP) - /* - * Release secondary cpus out of their spinloops at 0x60 now that - * we can map physical -> logical CPU ids - */ - smp_release_cpus(); -#endif - - /* Print various info about the machine that has been gathered so far. */ - print_system_info(); - - /* Reserve large chunks of memory for use by CMA for KVM */ - kvm_cma_reserve(); - - /* - * Reserve any gigantic pages requested on the command line. - * memblock needs to have been initialized by the time this is - * called since this will reserve memory. - */ - reserve_hugetlb_gpages(); - - klp_init_thread_info(&init_thread_info); - - init_mm.start_code = (unsigned long)_stext; - init_mm.end_code = (unsigned long) _etext; - init_mm.end_data = (unsigned long) _edata; - init_mm.brk = klimit; -#ifdef CONFIG_PPC_64K_PAGES - init_mm.context.pte_frag = NULL; -#endif -#ifdef CONFIG_SPAPR_TCE_IOMMU - mm_iommu_init(&init_mm.context); -#endif - irqstack_early_init(); - exc_lvl_early_init(); - emergency_stack_init(); - - initmem_init(); - -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif - if (ppc_md.setup_arch) - ppc_md.setup_arch(); - - paging_init(); - - /* Initialize the MMU context management stuff */ - mmu_context_init(); - - /* Interrupt code needs to be 64K-aligned */ - if ((unsigned long)_stext & 0xffff) - panic("Kernelbase not 64K-aligned (0x%lx)!\n", - (unsigned long)_stext); -} - #ifdef CONFIG_SMP #define PCPU_DYN_SIZE () -- cgit From 9d636109511a000882f8dff4eaafa874eec5ece8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 19 Jul 2016 14:48:30 +1000 Subject: powerpc/ftrace: Separate the heuristics for checking call sites In __ftrace_make_nop() (the 64-bit version), we have code to deal with two ftrace ABIs. There is the original ABI, which looks mostly like a function call, and then the mprofile-kernel ABI which is just a branch. The code tries to handle both cases, by looking for the presence of a load to restore the TOC pointer (PPC_INST_LD_TOC). If we detect the TOC load, we assume the call site is for an mcount() call using the old ABI. That means we patch the mcount() call with a b +8, to branch over the TOC load. However if the kernel was built with mprofile-kernel, then there will never be a call site using the original ftrace ABI. If for some reason we do see a TOC load, then it's there for a good reason, and we should not jump over it. So split the code, using the existing CC_USING_MPROFILE_KERNEL. Kernels built with mprofile-kernel will only look for, and expect, the new ABI, and similarly for the original ABI. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/ftrace.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 7af6c4de044b..cc52d9795f88 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -144,6 +144,21 @@ __ftrace_make_nop(struct module *mod, return -EINVAL; } +#ifdef CC_USING_MPROFILE_KERNEL + /* When using -mkernel_profile there is no load to jump over */ + pop = PPC_INST_NOP; + + if (probe_kernel_read(&op, (void *)(ip - 4), 4)) { + pr_err("Fetching instruction at %lx failed.\n", ip - 4); + return -EFAULT; + } + + /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ + if (op != PPC_INST_MFLR && op != PPC_INST_STD_LR) { + pr_err("Unexpected instruction %08x around bl _mcount\n", op); + return -EINVAL; + } +#else /* * Our original call site looks like: * @@ -170,24 +185,10 @@ __ftrace_make_nop(struct module *mod, } if (op != PPC_INST_LD_TOC) { - unsigned int inst; - - if (probe_kernel_read(&inst, (void *)(ip - 4), 4)) { - pr_err("Fetching instruction at %lx failed.\n", ip - 4); - return -EFAULT; - } - - /* We expect either a mlfr r0, or a std r0, LRSAVE(r1) */ - if (inst != PPC_INST_MFLR && inst != PPC_INST_STD_LR) { - pr_err("Unexpected instructions around bl _mcount\n" - "when enabling dynamic ftrace!\t" - "(%08x,bl,%08x)\n", inst, op); - return -EINVAL; - } - - /* When using -mkernel_profile there is no load to jump over */ - pop = PPC_INST_NOP; + pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, op); + return -EINVAL; } +#endif /* CC_USING_MPROFILE_KERNEL */ if (patch_instruction((unsigned int *)ip, pop)) { pr_err("Patching NOP failed.\n"); -- cgit From 31278b17a0dfed3014786b623fd07ee110b801da Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 19 Jul 2016 14:48:31 +1000 Subject: powerpc/modules: Never restore r2 for a mprofile-kernel style mcount() call In the module loader we process relocations, and for long jumps we generate trampolines (aka stubs). At the call site for one of these trampolines we usually need to generate a load instruction to restore the TOC pointer into r2. There is one exception however, which is calls to mcount() using the mprofile-kernel ABI, they handle the TOC inside the stub, and so for them we do not generate a TOC load. The bug is in how the code in restore_r2() decides if it needs to generate the TOC load. It does so by looking for a nop following the branch, and if it sees a nop, it replaces it with the load. In general the compiler has no reason to generate a nop following the mcount() call and so that check works OK. However if we combine a jump label at the start of a function, with an early return, such that GCC applies the shrink-wrapping optimisation, we can then end up with an mcount call followed immediately by a nop. However the nop is not there for a TOC load, it is for the jump label. That confuses restore_r2() into replacing the jump label nop with a TOC load, which in turn confuses ftrace into replacing the mcount call with a b +8 (fixed in the previous commit). The end result is we jump over the jump label, which if it was supposed to return means we incorrectly run the body of the function. We have seen this in practice with some yet-to-be-merged patches that use jump labels more extensively. The fix is relatively simple, in restore_r2() we check for an mprofile-kernel style mcount() call first, before looking for the presence of a nop. Fixes: 153086644fd1 ("powerpc/ftrace: Add support for -mprofile-kernel ftrace ABI") Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/module_64.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index f703f343358e..183368e008cf 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -494,9 +494,10 @@ static bool is_early_mcount_callsite(u32 *instruction) restore r2. */ static int restore_r2(u32 *instruction, struct module *me) { + if (is_early_mcount_callsite(instruction - 1)) + return 1; + if (*instruction != PPC_INST_NOP) { - if (is_early_mcount_callsite(instruction - 1)) - return 1; pr_err("%s: Expect noop after relocate, got %08x\n", me->name, *instruction); return 0; -- cgit From dd57023747e33572b31867f890b0d99f55b5cc2f Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 20 May 2016 04:41:34 +1000 Subject: powerpc: Improve comment explaining why we modify VRSAVE The comment explaining why we modify VRSAVE is misleading, glibc does rely on the behaviour. Update the comment. Signed-off-by: Anton Blanchard Reviewed-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/vector.S | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 1c2e7a343bf5..616a6d854638 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -70,10 +70,11 @@ _GLOBAL(load_up_altivec) MTMSRD(r5) /* enable use of AltiVec now */ isync - /* Hack: if we get an altivec unavailable trap with VRSAVE - * set to all zeros, we assume this is a broken application - * that fails to set it properly, and thus we switch it to - * all 1's + /* + * While userspace in general ignores VRSAVE, glibc uses it as a boolean + * to optimise userspace context save/restore. Whenever we take an + * altivec unavailable exception we must set VRSAVE to something non + * zero. Set it to all 1s. See also the programming note in the ISA. */ mfspr r4,SPRN_VRSAVE cmpwi 0,r4,0 -- cgit