From 5490125d77a43016b26f629d4b485e2c62172551 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:36:59 +0200 Subject: watchdog/core: Remove broken suspend/resume interfaces This interface has several issues: - It's causing recursive locking of the hotplug lock. - It's complete overkill to teardown all threads and then recreate them The same can be achieved with the simple hardlockup_detector_perf_stop / restart() interfaces. The abuse from the busy looping poweroff() loop of PARISC has been solved as well. Remove the cruft. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194146.487537732@linutronix.de Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/watchdog.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 2f6eadd9408d..5ded171f02d6 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -310,9 +310,6 @@ static int start_wd_on_cpu(unsigned int cpu) if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) return 0; - if (watchdog_suspended) - return 0; - if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) return 0; -- cgit From 6592ad2fcc8f15b4f99b36c1db7d9f65510c203b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:16 +0200 Subject: watchdog/core, powerpc: Make watchdog_nmi_reconfigure() two stage Both the perf reconfiguration and the powerpc watchdog_nmi_reconfigure() need to be done in two steps. 1) Stop all NMIs 2) Read the new parameters and start NMIs Right now watchdog_nmi_reconfigure() is a combination of both. To allow a clean reconfiguration add a 'run' argument and split the functionality in powerpc. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20170912194147.862865570@linutronix.de Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/watchdog.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 5ded171f02d6..291af79a9826 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -355,17 +355,18 @@ static void watchdog_calc_timeouts(void) wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5; } -void watchdog_nmi_reconfigure(void) +void watchdog_nmi_reconfigure(bool run) { int cpu; - watchdog_calc_timeouts(); - - for_each_cpu(cpu, &wd_cpus_enabled) - stop_wd_on_cpu(cpu); - - for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) - start_wd_on_cpu(cpu); + if (!run) { + for_each_cpu(cpu, &wd_cpus_enabled) + stop_wd_on_cpu(cpu); + } else { + watchdog_calc_timeouts(); + for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) + start_wd_on_cpu(cpu); + } } /* -- cgit From ab5fe3ff38ff9653490910cc71dbbedc95a86e41 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:23 +0200 Subject: watchdog/hardlockup: Clean up hotplug locking mess All watchdog thread related functions are delegated to the smpboot thread infrastructure, which handles serialization against CPU hotplug correctly. The sysctl interface is completely decoupled from anything which requires CPU hotplug protection. No need to protect the sysctl writes against cpu hotplug anymore. Remove it and add the now required protection to the powerpc arch_nmi_watchdog implementation. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20170912194148.418497420@linutronix.de Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/watchdog.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 291af79a9826..dfb067764480 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -359,6 +359,7 @@ void watchdog_nmi_reconfigure(bool run) { int cpu; + cpus_read_lock(); if (!run) { for_each_cpu(cpu, &wd_cpus_enabled) stop_wd_on_cpu(cpu); @@ -367,6 +368,7 @@ void watchdog_nmi_reconfigure(bool run) for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) start_wd_on_cpu(cpu); } + cpus_read_unlock(); } /* -- cgit From d8bd9f3f0925d22726de159531bfe3774b5cacc6 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 22 Sep 2017 13:32:21 +1000 Subject: powerpc: Handle MCE on POWER9 with only DSISR bit 30 set On POWER9 DD2.1 and below, it's possible for a paste instruction to cause a Machine Check Exception (MCE) where only DSISR bit 30 (IBM 33) is set. This will result in the MCE handler seeing an unknown event, which triggers linux to crash. We change this by detecting unknown events caused by load/stores in the MCE handler and marking them as handled so that we no longer crash. An MCE that occurs like this is spurious, so we don't need to do anything in terms of servicing it. If there is something that needs to be serviced, the CPU will raise the MCE again with the correct DSISR so that it can be serviced properly. Signed-off-by: Michael Neuling Reviewed-by: Nicholas Piggin [mpe: Expand comment with details from change log, use normal bit #s] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/mce_power.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index b76ca198e09c..f523125b9d34 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -624,5 +624,18 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs) long __machine_check_early_realmode_p9(struct pt_regs *regs) { + /* + * On POWER9 DD2.1 and below, it's possible to get a machine check + * caused by a paste instruction where only DSISR bit 30 is set. This + * will result in the MCE handler seeing an unknown event and the kernel + * crashing. An MCE that occurs like this is spurious, so we don't need + * to do anything in terms of servicing it. If there is something that + * needs to be serviced, the CPU will raise the MCE again with the + * correct DSISR so that it can be serviced properly. So detect this + * case and mark it as handled. + */ + if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x40000000) + return 1; + return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table); } -- cgit From bca73f595a566f0262967535bb5b2ea9c4271d9a Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 28 Sep 2017 22:37:35 -0500 Subject: powerpc: Fix workaround for spurious MCE on POWER9 In the recent commit d8bd9f3f0925 ("powerpc: Handle MCE on POWER9 with only DSISR bit 30 set") I screwed up the bit number. It should be bit 25 (IBM bit 38). Fixes: d8bd9f3f0925 ("powerpc: Handle MCE on POWER9 with only DSISR bit 30 set") Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/mce_power.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index f523125b9d34..72f153c6f3fa 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -626,7 +626,7 @@ long __machine_check_early_realmode_p9(struct pt_regs *regs) { /* * On POWER9 DD2.1 and below, it's possible to get a machine check - * caused by a paste instruction where only DSISR bit 30 is set. This + * caused by a paste instruction where only DSISR bit 25 is set. This * will result in the MCE handler seeing an unknown event and the kernel * crashing. An MCE that occurs like this is spurious, so we don't need * to do anything in terms of servicing it. If there is something that @@ -634,7 +634,7 @@ long __machine_check_early_realmode_p9(struct pt_regs *regs) * correct DSISR so that it can be serviced properly. So detect this * case and mark it as handled. */ - if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x40000000) + if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000) return 1; return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table); -- cgit From 3b7af5c0fd9631762d1c4d7b4cee76f571dd3c2c Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 27 Sep 2017 12:55:51 +0800 Subject: powerpc: Fix action argument for cpufeatures-based TLB flush Commit 41d0c2ecde19 ("powerpc/powernv: Fix local TLB flush for boot and MCE on POWER9") introduced calls to __flush_tlb_power[89] from the cpufeatures code, specifying the number of sets to flush. However, these functions take an action argument, not a number of sets. This means we hit the BUG() in __flush_tlb_{206,300} when using cpufeatures-style configuration. This change passes TLB_INVAL_SCOPE_GLOBAL instead. Fixes: 41d0c2ecde19 ("powerpc/powernv: Fix local TLB flush for boot and MCE on POWER9") Cc: stable@vger.kernel.org # v4.13+ Signed-off-by: Jeremy Kerr Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/dt_cpu_ftrs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 1df770e8cbe0..7275fed271af 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -102,10 +102,10 @@ static void cpufeatures_flush_tlb(void) case PVR_POWER8: case PVR_POWER8E: case PVR_POWER8NVL: - __flush_tlb_power8(POWER8_TLB_SETS); + __flush_tlb_power8(TLB_INVAL_SCOPE_GLOBAL); break; case PVR_POWER9: - __flush_tlb_power9(POWER9_TLB_SETS_HASH); + __flush_tlb_power9(TLB_INVAL_SCOPE_GLOBAL); break; default: pr_err("unknown CPU version for boot TLB flush\n"); -- cgit From 070e004912fed099263408bf2ff1bbc6926abe2e Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Sun, 1 Oct 2017 16:33:03 +0200 Subject: powerpc/4xx: Fix compile error with 64K pages on 40x, 44x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mmu context on the 40x, 44x does not define pte_frag entry. This causes gcc abort the compilation due to: setup-common.c: In function ‘setup_arch’: setup-common.c:908: error: ‘mm_context_t’ has no ‘pte_frag’ This patch fixes the issue by removing the pte_frag initialization in setup-common.c. This is possible, because the compiler will do the initialization, since the mm_context is a sub struct of init_mm. init_mm is declared in mm_types.h as external linkage. According to C99 6.2.4.3: An object whose identifier is declared with external linkage [...] has static storage duration. C99 defines in 6.7.8.10 that: If an object that has static storage duration is not initialized explicitly, then: - if it has pointer type, it is initialized to a null pointer Fixes: b1923caa6e64 ("powerpc: Merge 32-bit and 64-bit setup_arch()") Signed-off-by: Christian Lamparter Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup-common.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 0ac741fae90e..2e3bc16d02b2 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -904,9 +904,6 @@ void __init setup_arch(char **cmdline_p) #endif #endif -#ifdef CONFIG_PPC_64K_PAGES - init_mm.context.pte_frag = NULL; -#endif #ifdef CONFIG_SPAPR_TCE_IOMMU mm_iommu_init(&init_mm); #endif -- cgit From 6b9dc4806b28214a4a260517e59439e0ac12a15e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 2 Oct 2017 12:34:50 +0200 Subject: watchdog/core, powerpc: Replace watchdog_nmi_reconfigure() The recent cleanup of the watchdog code split watchdog_nmi_reconfigure() into two stages. One to stop the NMI and one to restart it after reconfiguration. That was done by adding a boolean 'run' argument to the code, which is functionally correct but not necessarily a piece of art. Replace it by two explicit functions: watchdog_nmi_stop() and watchdog_nmi_start(). Fixes: 6592ad2fcc8f ("watchdog/core, powerpc: Make watchdog_nmi_reconfigure() two stage") Requested-by: Linus 'Nursing his pet-peeve' Torvalds Signed-off-by: Thomas 'Mopping up garbage' Gleixner Acked-by: Michael Ellerman Cc: Peter Zijlstra Cc: Don Zickus Cc: Benjamin Herrenschmidt Cc: Nicholas Piggin Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1710021957480.2114@nanos --- arch/powerpc/kernel/watchdog.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index dfb067764480..2673ec8bec00 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -355,19 +355,24 @@ static void watchdog_calc_timeouts(void) wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5; } -void watchdog_nmi_reconfigure(bool run) +void watchdog_nmi_stop(void) { int cpu; cpus_read_lock(); - if (!run) { - for_each_cpu(cpu, &wd_cpus_enabled) - stop_wd_on_cpu(cpu); - } else { - watchdog_calc_timeouts(); - for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) - start_wd_on_cpu(cpu); - } + for_each_cpu(cpu, &wd_cpus_enabled) + stop_wd_on_cpu(cpu); + cpus_read_unlock(); +} + +void watchdog_nmi_start(void) +{ + int cpu; + + cpus_read_lock(); + watchdog_calc_timeouts(); + for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) + start_wd_on_cpu(cpu); cpus_read_unlock(); } -- cgit From e31d6883f21c1cdfe5bc64e28411f8a92b783fde Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Oct 2017 16:37:53 +0200 Subject: watchdog/core, powerpc: Lock cpus across reconfiguration Instead of dropping the cpu hotplug lock after stopping NMI watchdog and threads and reaquiring for restart, the code and the protection rules become more obvious when holding cpu hotplug lock across the full reconfiguration. Suggested-by: Linus Torvalds Signed-off-by: Thomas Gleixner Acked-by: Michael Ellerman Cc: Peter Zijlstra Cc: Don Zickus Cc: Benjamin Herrenschmidt Cc: Nicholas Piggin Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1710022105570.2114@nanos --- arch/powerpc/kernel/watchdog.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 2673ec8bec00..f9b4c6352d24 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -359,21 +359,17 @@ void watchdog_nmi_stop(void) { int cpu; - cpus_read_lock(); for_each_cpu(cpu, &wd_cpus_enabled) stop_wd_on_cpu(cpu); - cpus_read_unlock(); } void watchdog_nmi_start(void) { int cpu; - cpus_read_lock(); watchdog_calc_timeouts(); for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) start_wd_on_cpu(cpu); - cpus_read_unlock(); } /* -- cgit From 34ddaa3e5c0096fef52485186c7eb6cf56ddc686 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Oct 2017 16:39:02 +0200 Subject: powerpc/watchdog: Make use of watchdog_nmi_probe() The rework of the core hotplug code triggers the WARN_ON in start_wd_cpu() on powerpc because it is called multiple times for the boot CPU. The first call is via: start_wd_on_cpu+0x80/0x2f0 watchdog_nmi_reconfigure+0x124/0x170 softlockup_reconfigure_threads+0x110/0x130 lockup_detector_init+0xbc/0xe0 kernel_init_freeable+0x18c/0x37c kernel_init+0x2c/0x160 ret_from_kernel_thread+0x5c/0xbc And then again via the CPU hotplug registration: start_wd_on_cpu+0x80/0x2f0 cpuhp_invoke_callback+0x194/0x620 cpuhp_thread_fun+0x7c/0x1b0 smpboot_thread_fn+0x290/0x2a0 kthread+0x168/0x1b0 ret_from_kernel_thread+0x5c/0xbc This can be avoided by setting up the cpu hotplug state with nocalls and move the initialization to the watchdog_nmi_probe() function. That initializes the hotplug callbacks without invoking the callback and the following core initialization function then configures the watchdog for the online CPUs (in this case CPU0) via softlockup_reconfigure_threads(). Reported-and-tested-by: Michael Ellerman Signed-off-by: Thomas Gleixner Acked-by: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Nicholas Piggin Cc: linuxppc-dev@lists.ozlabs.org --- arch/powerpc/kernel/watchdog.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index f9b4c6352d24..c702a8981452 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -373,22 +373,21 @@ void watchdog_nmi_start(void) } /* - * This runs after lockup_detector_init() which sets up watchdog_cpumask. + * Invoked from core watchdog init. */ -static int __init powerpc_watchdog_init(void) +int __init watchdog_nmi_probe(void) { int err; - watchdog_calc_timeouts(); - - err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/watchdog:online", - start_wd_on_cpu, stop_wd_on_cpu); - if (err < 0) + err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, + "powerpc/watchdog:online", + start_wd_on_cpu, stop_wd_on_cpu); + if (err < 0) { pr_warn("Watchdog could not be initialized"); - + return err; + } return 0; } -arch_initcall(powerpc_watchdog_init); static void handle_backtrace_ipi(struct pt_regs *regs) { -- cgit From 265e60a170d0a0ecfc2d20490134ed2c48dd45ab Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Thu, 17 Aug 2017 20:42:26 +1000 Subject: powerpc/64s: Use emergency stack for kernel TM Bad Thing program checks When using transactional memory (TM), the CPU can be in one of six states as far as TM is concerned, encoded in the Machine State Register (MSR). Certain state transitions are illegal and if attempted trigger a "TM Bad Thing" type program check exception. If we ever hit one of these exceptions it's treated as a bug, ie. we oops, and kill the process and/or panic, depending on configuration. One case where we can trigger a TM Bad Thing, is when returning to userspace after a system call or interrupt, using RFID. When this happens the CPU first restores the user register state, in particular r1 (the stack pointer) and then attempts to update the MSR. However the MSR update is not allowed and so we take the program check with the user register state, but the kernel MSR. This tricks the exception entry code into thinking we have a bad kernel stack pointer, because the MSR says we're coming from the kernel, but r1 is pointing to userspace. To avoid this we instead always switch to the emergency stack if we take a TM Bad Thing from the kernel. That way none of the user register values are used, other than for printing in the oops message. This is the fix for CVE-2017-1000255. Fixes: 5d176f751ee3 ("powerpc: tm: Enable transactional memory (TM) lazily for userspace") Cc: stable@vger.kernel.org # v4.9+ Signed-off-by: Cyril Bur [mpe: Rewrite change log & comments, tweak asm slightly] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 48da0f5d2f7f..b82586c53560 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -734,7 +734,29 @@ EXC_REAL(program_check, 0x700, 0x100) EXC_VIRT(program_check, 0x4700, 0x100, 0x700) TRAMP_KVM(PACA_EXGEN, 0x700) EXC_COMMON_BEGIN(program_check_common) - EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) + /* + * It's possible to receive a TM Bad Thing type program check with + * userspace register values (in particular r1), but with SRR1 reporting + * that we came from the kernel. Normally that would confuse the bad + * stack logic, and we would report a bad kernel stack pointer. Instead + * we switch to the emergency stack if we're taking a TM Bad Thing from + * the kernel. + */ + li r10,MSR_PR /* Build a mask of MSR_PR .. */ + oris r10,r10,0x200000@h /* .. and SRR1_PROGTM */ + and r10,r10,r12 /* Mask SRR1 with that. */ + srdi r10,r10,8 /* Shift it so we can compare */ + cmpldi r10,(0x200000 >> 8) /* .. with an immediate. */ + bne 1f /* If != go to normal path. */ + + /* SRR1 had PR=0 and SRR1_PROGTM=1, so use the emergency stack */ + andi. r10,r12,MSR_PR; /* Set CR0 correctly for label */ + /* 3 in EXCEPTION_PROLOG_COMMON */ + mr r10,r1 /* Save r1 */ + ld r1,PACAEMERGSP(r13) /* Use emergency stack */ + subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ + b 3f /* Jump into the macro !! */ +1: EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD -- cgit From 044215d145a7a8a60ffa8fdc859d110a795fa6ea Mon Sep 17 00:00:00 2001 From: Gustavo Romero Date: Tue, 22 Aug 2017 17:20:09 -0400 Subject: powerpc/tm: Fix illegal TM state in signal handler Currently it's possible that on returning from the signal handler through the restore_tm_sigcontexts() code path (e.g. from a signal caught due to a `trap` instruction executed in the middle of an HTM block, or a deliberately constructed sigframe) an illegal TM state (like TS=10 TM=0, i.e. "T0") is set in SRR1 and when `rfid` sets implicitly the MSR register from SRR1 register on return to userspace it causes a TM Bad Thing exception. That illegal state can be set (a) by a malicious user that disables the TM bit by tweaking the bits in uc_mcontext before returning from the signal handler or (b) by a sufficient number of context switches occurring such that the load_tm counter overflows and TM is disabled whilst in the signal handler. This commit fixes the illegal TM state by ensuring that TM bit is always enabled before we return from restore_tm_sigcontexts(). A small comment correction is made as well. Fixes: 5d176f751ee3 ("powerpc: tm: Enable transactional memory (TM) lazily for userspace") Cc: stable@vger.kernel.org # v4.9+ Signed-off-by: Gustavo Romero Signed-off-by: Breno Leitao Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/signal_64.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index c83c115858c1..b2c002993d78 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -452,9 +452,20 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, if (MSR_TM_RESV(msr)) return -EINVAL; - /* pull in MSR TM from user context */ + /* pull in MSR TS bits from user context */ regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK); + /* + * Ensure that TM is enabled in regs->msr before we leave the signal + * handler. It could be the case that (a) user disabled the TM bit + * through the manipulation of the MSR bits in uc_mcontext or (b) the + * TM bit was disabled because a sufficient number of context switches + * happened whilst in the signal handler and load_tm overflowed, + * disabling the TM bit. In either case we can end up with an illegal + * TM state leading to a TM Bad Thing when we return to userspace. + */ + regs->msr |= MSR_TM; + /* pull in MSR LE from user context */ regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); -- cgit From e36a82ee4c514a2f4f8fa30c780ad059282f5d64 Mon Sep 17 00:00:00 2001 From: Kamalesh Babulal Date: Wed, 20 Sep 2017 15:49:51 +0530 Subject: powerpc/livepatch: Fix livepatch stack access While running stress test with livepatch module loaded, kernel bug was triggered. cpu 0x5: Vector: 400 (Instruction Access) at [c0000000eb9d3b60] 5:mon> t [c0000000eb9d3de0] c0000000eb9d3e30 (unreliable) [c0000000eb9d3e30] c000000000008ab4 hardware_interrupt_common+0x114/0x120 --- Exception: 501 (Hardware Interrupt) at c000000000053040 livepatch_handler+0x4c/0x74 [c0000000eb9d4120] 0000000057ac6e9d (unreliable) [d0000000089d9f78] 2e0965747962382e SP (965747962342e09) is in userspace When an interrupt occurs during the livepatch_handler execution, it's possible for the livepatch_stack and/or thread_info to be corrupted. eg: Task A Interrupt Handler ========= ================= livepatch_handler: mr r0, r1 ld r1, TI_livepatch_sp(r12) hardware_interrupt_common: do_IRQ+0x8: mflr r0 <- saved stack pointer is overwritten bl _mcount ... std r27,-40(r1) <- overwrite of thread_info() lis r2, STACK_END_MAGIC@h ori r2, r2, STACK_END_MAGIC@l ld r12, -8(r1) Fix the corruption by using r11 register for livepatch stack manipulation, instead of shuffling task stack and livepatch stack into r1 register. Using r11 register also avoids disabling/enabling irq's while setting up the livepatch stack. Signed-off-by: Kamalesh Babulal Reviewed-by: Naveen N. Rao Reviewed-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/trace/ftrace_64_mprofile.S | 45 +++++++++----------------- 1 file changed, 15 insertions(+), 30 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index c98e90b4ea7b..b4e2b7165f79 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -181,34 +181,25 @@ _GLOBAL(ftrace_stub) * - we have no stack frame and can not allocate one * - LR points back to the original caller (in A) * - CTR holds the new NIP in C - * - r0 & r12 are free - * - * r0 can't be used as the base register for a DS-form load or store, so - * we temporarily shuffle r1 (stack pointer) into r0 and then put it back. + * - r0, r11 & r12 are free */ livepatch_handler: CURRENT_THREAD_INFO(r12, r1) - /* Save stack pointer into r0 */ - mr r0, r1 - /* Allocate 3 x 8 bytes */ - ld r1, TI_livepatch_sp(r12) - addi r1, r1, 24 - std r1, TI_livepatch_sp(r12) + ld r11, TI_livepatch_sp(r12) + addi r11, r11, 24 + std r11, TI_livepatch_sp(r12) /* Save toc & real LR on livepatch stack */ - std r2, -24(r1) + std r2, -24(r11) mflr r12 - std r12, -16(r1) + std r12, -16(r11) /* Store stack end marker */ lis r12, STACK_END_MAGIC@h ori r12, r12, STACK_END_MAGIC@l - std r12, -8(r1) - - /* Restore real stack pointer */ - mr r1, r0 + std r12, -8(r11) /* Put ctr in r12 for global entry and branch there */ mfctr r12 @@ -216,36 +207,30 @@ livepatch_handler: /* * Now we are returning from the patched function to the original - * caller A. We are free to use r0 and r12, and we can use r2 until we + * caller A. We are free to use r11, r12 and we can use r2 until we * restore it. */ CURRENT_THREAD_INFO(r12, r1) - /* Save stack pointer into r0 */ - mr r0, r1 - - ld r1, TI_livepatch_sp(r12) + ld r11, TI_livepatch_sp(r12) /* Check stack marker hasn't been trashed */ lis r2, STACK_END_MAGIC@h ori r2, r2, STACK_END_MAGIC@l - ld r12, -8(r1) + ld r12, -8(r11) 1: tdne r12, r2 EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0 /* Restore LR & toc from livepatch stack */ - ld r12, -16(r1) + ld r12, -16(r11) mtlr r12 - ld r2, -24(r1) + ld r2, -24(r11) /* Pop livepatch stack frame */ - CURRENT_THREAD_INFO(r12, r0) - subi r1, r1, 24 - std r1, TI_livepatch_sp(r12) - - /* Restore real stack pointer */ - mr r1, r0 + CURRENT_THREAD_INFO(r12, r1) + subi r11, r11, 24 + std r11, TI_livepatch_sp(r12) /* Return to original caller of live patched function */ blr -- cgit