From 5490125d77a43016b26f629d4b485e2c62172551 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 12 Sep 2017 21:36:59 +0200
Subject: watchdog/core: Remove broken suspend/resume interfaces

This interface has several issues:

 - It's causing recursive locking of the hotplug lock.

 - It's complete overkill to teardown all threads and then recreate them

The same can be achieved with the simple hardlockup_detector_perf_stop /
restart() interfaces. The abuse from the busy looping poweroff() loop of
PARISC has been solved as well.

Remove the cruft.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Link: http://lkml.kernel.org/r/20170912194146.487537732@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/kernel/watchdog.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 2f6eadd9408d..5ded171f02d6 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -310,9 +310,6 @@ static int start_wd_on_cpu(unsigned int cpu)
 	if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
 		return 0;
 
-	if (watchdog_suspended)
-		return 0;
-
 	if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
 		return 0;
 
-- 
cgit 


From 6592ad2fcc8f15b4f99b36c1db7d9f65510c203b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 12 Sep 2017 21:37:16 +0200
Subject: watchdog/core, powerpc: Make watchdog_nmi_reconfigure() two stage

Both the perf reconfiguration and the powerpc watchdog_nmi_reconfigure()
need to be done in two steps.

     1) Stop all NMIs
     2) Read the new parameters and start NMIs

Right now watchdog_nmi_reconfigure() is a combination of both. To allow a
clean reconfiguration add a 'run' argument and split the functionality in
powerpc.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/20170912194147.862865570@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/kernel/watchdog.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 5ded171f02d6..291af79a9826 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -355,17 +355,18 @@ static void watchdog_calc_timeouts(void)
 	wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5;
 }
 
-void watchdog_nmi_reconfigure(void)
+void watchdog_nmi_reconfigure(bool run)
 {
 	int cpu;
 
-	watchdog_calc_timeouts();
-
-	for_each_cpu(cpu, &wd_cpus_enabled)
-		stop_wd_on_cpu(cpu);
-
-	for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
-		start_wd_on_cpu(cpu);
+	if (!run) {
+		for_each_cpu(cpu, &wd_cpus_enabled)
+			stop_wd_on_cpu(cpu);
+	} else {
+		watchdog_calc_timeouts();
+		for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
+			start_wd_on_cpu(cpu);
+	}
 }
 
 /*
-- 
cgit 


From ab5fe3ff38ff9653490910cc71dbbedc95a86e41 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 12 Sep 2017 21:37:23 +0200
Subject: watchdog/hardlockup: Clean up hotplug locking mess

All watchdog thread related functions are delegated to the smpboot thread
infrastructure, which handles serialization against CPU hotplug correctly.

The sysctl interface is completely decoupled from anything which requires
CPU hotplug protection.

No need to protect the sysctl writes against cpu hotplug anymore. Remove it
and add the now required protection to the powerpc arch_nmi_watchdog
implementation.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/20170912194148.418497420@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/kernel/watchdog.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 291af79a9826..dfb067764480 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -359,6 +359,7 @@ void watchdog_nmi_reconfigure(bool run)
 {
 	int cpu;
 
+	cpus_read_lock();
 	if (!run) {
 		for_each_cpu(cpu, &wd_cpus_enabled)
 			stop_wd_on_cpu(cpu);
@@ -367,6 +368,7 @@ void watchdog_nmi_reconfigure(bool run)
 		for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
 			start_wd_on_cpu(cpu);
 	}
+	cpus_read_unlock();
 }
 
 /*
-- 
cgit 


From d8bd9f3f0925d22726de159531bfe3774b5cacc6 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Fri, 22 Sep 2017 13:32:21 +1000
Subject: powerpc: Handle MCE on POWER9 with only DSISR bit 30 set

On POWER9 DD2.1 and below, it's possible for a paste instruction to
cause a Machine Check Exception (MCE) where only DSISR bit 30 (IBM 33)
is set. This will result in the MCE handler seeing an unknown event,
which triggers linux to crash.

We change this by detecting unknown events caused by load/stores in
the MCE handler and marking them as handled so that we no longer
crash.

An MCE that occurs like this is spurious, so we don't need to do
anything in terms of servicing it. If there is something that needs to
be serviced, the CPU will raise the MCE again with the correct DSISR
so that it can be serviced properly.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com
Acked-by: Balbir Singh <bsingharora@gmail.com>
[mpe: Expand comment with details from change log, use normal bit #s]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/mce_power.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index b76ca198e09c..f523125b9d34 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -624,5 +624,18 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs)
 
 long __machine_check_early_realmode_p9(struct pt_regs *regs)
 {
+	/*
+	 * On POWER9 DD2.1 and below, it's possible to get a machine check
+	 * caused by a paste instruction where only DSISR bit 30 is set. This
+	 * will result in the MCE handler seeing an unknown event and the kernel
+	 * crashing. An MCE that occurs like this is spurious, so we don't need
+	 * to do anything in terms of servicing it. If there is something that
+	 * needs to be serviced, the CPU will raise the MCE again with the
+	 * correct DSISR so that it can be serviced properly. So detect this
+	 * case and mark it as handled.
+	 */
+	if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x40000000)
+		return 1;
+
 	return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table);
 }
-- 
cgit 


From bca73f595a566f0262967535bb5b2ea9c4271d9a Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Thu, 28 Sep 2017 22:37:35 -0500
Subject: powerpc: Fix workaround for spurious MCE on POWER9

In the recent commit d8bd9f3f0925 ("powerpc: Handle MCE on POWER9 with
only DSISR bit 30 set") I screwed up the bit number. It should be bit
25 (IBM bit 38).

Fixes: d8bd9f3f0925 ("powerpc: Handle MCE on POWER9 with only DSISR bit 30 set")
Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/mce_power.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index f523125b9d34..72f153c6f3fa 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -626,7 +626,7 @@ long __machine_check_early_realmode_p9(struct pt_regs *regs)
 {
 	/*
 	 * On POWER9 DD2.1 and below, it's possible to get a machine check
-	 * caused by a paste instruction where only DSISR bit 30 is set. This
+	 * caused by a paste instruction where only DSISR bit 25 is set. This
 	 * will result in the MCE handler seeing an unknown event and the kernel
 	 * crashing. An MCE that occurs like this is spurious, so we don't need
 	 * to do anything in terms of servicing it. If there is something that
@@ -634,7 +634,7 @@ long __machine_check_early_realmode_p9(struct pt_regs *regs)
 	 * correct DSISR so that it can be serviced properly. So detect this
 	 * case and mark it as handled.
 	 */
-	if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x40000000)
+	if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000)
 		return 1;
 
 	return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table);
-- 
cgit 


From 3b7af5c0fd9631762d1c4d7b4cee76f571dd3c2c Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Wed, 27 Sep 2017 12:55:51 +0800
Subject: powerpc: Fix action argument for cpufeatures-based TLB flush

Commit 41d0c2ecde19 ("powerpc/powernv: Fix local TLB flush for boot
and MCE on POWER9") introduced calls to __flush_tlb_power[89] from the
cpufeatures code, specifying the number of sets to flush.

However, these functions take an action argument, not a number of
sets. This means we hit the BUG() in __flush_tlb_{206,300} when using
cpufeatures-style configuration.

This change passes TLB_INVAL_SCOPE_GLOBAL instead.

Fixes: 41d0c2ecde19 ("powerpc/powernv: Fix local TLB flush for boot and MCE on POWER9")
Cc: stable@vger.kernel.org # v4.13+
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/dt_cpu_ftrs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 1df770e8cbe0..7275fed271af 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -102,10 +102,10 @@ static void cpufeatures_flush_tlb(void)
 	case PVR_POWER8:
 	case PVR_POWER8E:
 	case PVR_POWER8NVL:
-		__flush_tlb_power8(POWER8_TLB_SETS);
+		__flush_tlb_power8(TLB_INVAL_SCOPE_GLOBAL);
 		break;
 	case PVR_POWER9:
-		__flush_tlb_power9(POWER9_TLB_SETS_HASH);
+		__flush_tlb_power9(TLB_INVAL_SCOPE_GLOBAL);
 		break;
 	default:
 		pr_err("unknown CPU version for boot TLB flush\n");
-- 
cgit 


From 070e004912fed099263408bf2ff1bbc6926abe2e Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@gmail.com>
Date: Sun, 1 Oct 2017 16:33:03 +0200
Subject: powerpc/4xx: Fix compile error with 64K pages on 40x, 44x
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The mmu context on the 40x, 44x does not define pte_frag entry. This
causes gcc abort the compilation due to:

  setup-common.c: In function ‘setup_arch’:
  setup-common.c:908: error: ‘mm_context_t’ has no ‘pte_frag’

This patch fixes the issue by removing the pte_frag initialization in
setup-common.c.

This is possible, because the compiler will do the initialization,
since the mm_context is a sub struct of init_mm. init_mm is declared
in mm_types.h as external linkage.

According to C99 6.2.4.3:
  An object whose identifier is declared with external linkage
  [...] has static storage duration.

C99 defines in 6.7.8.10 that:
  If an object that has static storage duration is not
  initialized explicitly, then:
  - if it has pointer type, it is initialized to a null pointer

Fixes: b1923caa6e64 ("powerpc: Merge 32-bit and 64-bit setup_arch()")
Signed-off-by: Christian Lamparter <chunkeey@gmail.com>
Reviewed-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/setup-common.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 0ac741fae90e..2e3bc16d02b2 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -904,9 +904,6 @@ void __init setup_arch(char **cmdline_p)
 #endif
 #endif
 
-#ifdef CONFIG_PPC_64K_PAGES
-	init_mm.context.pte_frag = NULL;
-#endif
 #ifdef CONFIG_SPAPR_TCE_IOMMU
 	mm_iommu_init(&init_mm);
 #endif
-- 
cgit 


From 6b9dc4806b28214a4a260517e59439e0ac12a15e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 2 Oct 2017 12:34:50 +0200
Subject: watchdog/core, powerpc: Replace watchdog_nmi_reconfigure()

The recent cleanup of the watchdog code split watchdog_nmi_reconfigure()
into two stages. One to stop the NMI and one to restart it after
reconfiguration. That was done by adding a boolean 'run' argument to the
code, which is functionally correct but not necessarily a piece of art.

Replace it by two explicit functions: watchdog_nmi_stop() and
watchdog_nmi_start().

Fixes: 6592ad2fcc8f ("watchdog/core, powerpc: Make watchdog_nmi_reconfigure() two stage")
Requested-by: Linus 'Nursing his pet-peeve' Torvalds <torvalds@linuxfoundation.org>
Signed-off-by: Thomas 'Mopping up garbage' Gleixner <tglx@linutronix.de>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1710021957480.2114@nanos
---
 arch/powerpc/kernel/watchdog.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index dfb067764480..2673ec8bec00 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -355,19 +355,24 @@ static void watchdog_calc_timeouts(void)
 	wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5;
 }
 
-void watchdog_nmi_reconfigure(bool run)
+void watchdog_nmi_stop(void)
 {
 	int cpu;
 
 	cpus_read_lock();
-	if (!run) {
-		for_each_cpu(cpu, &wd_cpus_enabled)
-			stop_wd_on_cpu(cpu);
-	} else {
-		watchdog_calc_timeouts();
-		for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
-			start_wd_on_cpu(cpu);
-	}
+	for_each_cpu(cpu, &wd_cpus_enabled)
+		stop_wd_on_cpu(cpu);
+	cpus_read_unlock();
+}
+
+void watchdog_nmi_start(void)
+{
+	int cpu;
+
+	cpus_read_lock();
+	watchdog_calc_timeouts();
+	for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
+		start_wd_on_cpu(cpu);
 	cpus_read_unlock();
 }
 
-- 
cgit 


From e31d6883f21c1cdfe5bc64e28411f8a92b783fde Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 3 Oct 2017 16:37:53 +0200
Subject: watchdog/core, powerpc: Lock cpus across reconfiguration

Instead of dropping the cpu hotplug lock after stopping NMI watchdog and
threads and reaquiring for restart, the code and the protection rules
become more obvious when holding cpu hotplug lock across the full
reconfiguration.

Suggested-by: Linus Torvalds <torvalds@linuxfoundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1710022105570.2114@nanos
---
 arch/powerpc/kernel/watchdog.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 2673ec8bec00..f9b4c6352d24 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -359,21 +359,17 @@ void watchdog_nmi_stop(void)
 {
 	int cpu;
 
-	cpus_read_lock();
 	for_each_cpu(cpu, &wd_cpus_enabled)
 		stop_wd_on_cpu(cpu);
-	cpus_read_unlock();
 }
 
 void watchdog_nmi_start(void)
 {
 	int cpu;
 
-	cpus_read_lock();
 	watchdog_calc_timeouts();
 	for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
 		start_wd_on_cpu(cpu);
-	cpus_read_unlock();
 }
 
 /*
-- 
cgit 


From 34ddaa3e5c0096fef52485186c7eb6cf56ddc686 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 3 Oct 2017 16:39:02 +0200
Subject: powerpc/watchdog: Make use of watchdog_nmi_probe()

The rework of the core hotplug code triggers the WARN_ON in start_wd_cpu()
on powerpc because it is called multiple times for the boot CPU.

The first call is via:

  start_wd_on_cpu+0x80/0x2f0
  watchdog_nmi_reconfigure+0x124/0x170
  softlockup_reconfigure_threads+0x110/0x130
  lockup_detector_init+0xbc/0xe0
  kernel_init_freeable+0x18c/0x37c
  kernel_init+0x2c/0x160
  ret_from_kernel_thread+0x5c/0xbc

And then again via the CPU hotplug registration:

  start_wd_on_cpu+0x80/0x2f0
  cpuhp_invoke_callback+0x194/0x620
  cpuhp_thread_fun+0x7c/0x1b0
  smpboot_thread_fn+0x290/0x2a0
  kthread+0x168/0x1b0
  ret_from_kernel_thread+0x5c/0xbc

This can be avoided by setting up the cpu hotplug state with nocalls and
move the initialization to the watchdog_nmi_probe() function. That
initializes the hotplug callbacks without invoking the callback and the
following core initialization function then configures the watchdog for the
online CPUs (in this case CPU0) via softlockup_reconfigure_threads().

Reported-and-tested-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: linuxppc-dev@lists.ozlabs.org
---
 arch/powerpc/kernel/watchdog.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index f9b4c6352d24..c702a8981452 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -373,22 +373,21 @@ void watchdog_nmi_start(void)
 }
 
 /*
- * This runs after lockup_detector_init() which sets up watchdog_cpumask.
+ * Invoked from core watchdog init.
  */
-static int __init powerpc_watchdog_init(void)
+int __init watchdog_nmi_probe(void)
 {
 	int err;
 
-	watchdog_calc_timeouts();
-
-	err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/watchdog:online",
-				start_wd_on_cpu, stop_wd_on_cpu);
-	if (err < 0)
+	err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+					"powerpc/watchdog:online",
+					start_wd_on_cpu, stop_wd_on_cpu);
+	if (err < 0) {
 		pr_warn("Watchdog could not be initialized");
-
+		return err;
+	}
 	return 0;
 }
-arch_initcall(powerpc_watchdog_init);
 
 static void handle_backtrace_ipi(struct pt_regs *regs)
 {
-- 
cgit 


From 265e60a170d0a0ecfc2d20490134ed2c48dd45ab Mon Sep 17 00:00:00 2001
From: Cyril Bur <cyrilbur@gmail.com>
Date: Thu, 17 Aug 2017 20:42:26 +1000
Subject: powerpc/64s: Use emergency stack for kernel TM Bad Thing program
 checks

When using transactional memory (TM), the CPU can be in one of six
states as far as TM is concerned, encoded in the Machine State
Register (MSR). Certain state transitions are illegal and if attempted
trigger a "TM Bad Thing" type program check exception.

If we ever hit one of these exceptions it's treated as a bug, ie. we
oops, and kill the process and/or panic, depending on configuration.

One case where we can trigger a TM Bad Thing, is when returning to
userspace after a system call or interrupt, using RFID. When this
happens the CPU first restores the user register state, in particular
r1 (the stack pointer) and then attempts to update the MSR. However
the MSR update is not allowed and so we take the program check with
the user register state, but the kernel MSR.

This tricks the exception entry code into thinking we have a bad
kernel stack pointer, because the MSR says we're coming from the
kernel, but r1 is pointing to userspace.

To avoid this we instead always switch to the emergency stack if we
take a TM Bad Thing from the kernel. That way none of the user
register values are used, other than for printing in the oops message.

This is the fix for CVE-2017-1000255.

Fixes: 5d176f751ee3 ("powerpc: tm: Enable transactional memory (TM) lazily for userspace")
Cc: stable@vger.kernel.org # v4.9+
Signed-off-by: Cyril Bur <cyrilbur@gmail.com>
[mpe: Rewrite change log & comments, tweak asm slightly]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/exceptions-64s.S | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 48da0f5d2f7f..b82586c53560 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -734,7 +734,29 @@ EXC_REAL(program_check, 0x700, 0x100)
 EXC_VIRT(program_check, 0x4700, 0x100, 0x700)
 TRAMP_KVM(PACA_EXGEN, 0x700)
 EXC_COMMON_BEGIN(program_check_common)
-	EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
+	/*
+	 * It's possible to receive a TM Bad Thing type program check with
+	 * userspace register values (in particular r1), but with SRR1 reporting
+	 * that we came from the kernel. Normally that would confuse the bad
+	 * stack logic, and we would report a bad kernel stack pointer. Instead
+	 * we switch to the emergency stack if we're taking a TM Bad Thing from
+	 * the kernel.
+	 */
+	li	r10,MSR_PR		/* Build a mask of MSR_PR ..	*/
+	oris	r10,r10,0x200000@h	/* .. and SRR1_PROGTM		*/
+	and	r10,r10,r12		/* Mask SRR1 with that.		*/
+	srdi	r10,r10,8		/* Shift it so we can compare	*/
+	cmpldi	r10,(0x200000 >> 8)	/* .. with an immediate.	*/
+	bne 1f				/* If != go to normal path.	*/
+
+	/* SRR1 had PR=0 and SRR1_PROGTM=1, so use the emergency stack	*/
+	andi.	r10,r12,MSR_PR;		/* Set CR0 correctly for label	*/
+					/* 3 in EXCEPTION_PROLOG_COMMON	*/
+	mr	r10,r1			/* Save r1			*/
+	ld	r1,PACAEMERGSP(r13)	/* Use emergency stack		*/
+	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack frame		*/
+	b 3f				/* Jump into the macro !!	*/
+1:	EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
 	bl	save_nvgprs
 	RECONCILE_IRQ_STATE(r10, r11)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-- 
cgit 


From 044215d145a7a8a60ffa8fdc859d110a795fa6ea Mon Sep 17 00:00:00 2001
From: Gustavo Romero <gromero@linux.vnet.ibm.com>
Date: Tue, 22 Aug 2017 17:20:09 -0400
Subject: powerpc/tm: Fix illegal TM state in signal handler

Currently it's possible that on returning from the signal handler
through the restore_tm_sigcontexts() code path (e.g. from a signal
caught due to a `trap` instruction executed in the middle of an HTM
block, or a deliberately constructed sigframe) an illegal TM state
(like TS=10 TM=0, i.e. "T0") is set in SRR1 and when `rfid` sets
implicitly the MSR register from SRR1 register on return to userspace
it causes a TM Bad Thing exception.

That illegal state can be set (a) by a malicious user that disables
the TM bit by tweaking the bits in uc_mcontext before returning from
the signal handler or (b) by a sufficient number of context switches
occurring such that the load_tm counter overflows and TM is disabled
whilst in the signal handler.

This commit fixes the illegal TM state by ensuring that TM bit is
always enabled before we return from restore_tm_sigcontexts(). A small
comment correction is made as well.

Fixes: 5d176f751ee3 ("powerpc: tm: Enable transactional memory (TM) lazily for userspace")
Cc: stable@vger.kernel.org # v4.9+
Signed-off-by: Gustavo Romero <gromero@linux.vnet.ibm.com>
Signed-off-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Cyril Bur <cyrilbur@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/signal_64.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index c83c115858c1..b2c002993d78 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -452,9 +452,20 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
 	if (MSR_TM_RESV(msr))
 		return -EINVAL;
 
-	/* pull in MSR TM from user context */
+	/* pull in MSR TS bits from user context */
 	regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK);
 
+	/*
+	 * Ensure that TM is enabled in regs->msr before we leave the signal
+	 * handler. It could be the case that (a) user disabled the TM bit
+	 * through the manipulation of the MSR bits in uc_mcontext or (b) the
+	 * TM bit was disabled because a sufficient number of context switches
+	 * happened whilst in the signal handler and load_tm overflowed,
+	 * disabling the TM bit. In either case we can end up with an illegal
+	 * TM state leading to a TM Bad Thing when we return to userspace.
+	 */
+	regs->msr |= MSR_TM;
+
 	/* pull in MSR LE from user context */
 	regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
 
-- 
cgit 


From e36a82ee4c514a2f4f8fa30c780ad059282f5d64 Mon Sep 17 00:00:00 2001
From: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
Date: Wed, 20 Sep 2017 15:49:51 +0530
Subject: powerpc/livepatch: Fix livepatch stack access

While running stress test with livepatch module loaded, kernel bug was
triggered.

  cpu 0x5: Vector: 400 (Instruction Access) at [c0000000eb9d3b60]
  5:mon> t
  [c0000000eb9d3de0] c0000000eb9d3e30 (unreliable)
  [c0000000eb9d3e30] c000000000008ab4 hardware_interrupt_common+0x114/0x120
   --- Exception: 501 (Hardware Interrupt) at c000000000053040 livepatch_handler+0x4c/0x74
  [c0000000eb9d4120] 0000000057ac6e9d (unreliable)
  [d0000000089d9f78] 2e0965747962382e
  SP (965747962342e09) is in userspace

When an interrupt occurs during the livepatch_handler execution, it's
possible for the livepatch_stack and/or thread_info to be corrupted.
eg:

  Task A                        Interrupt Handler
  =========                     =================
  livepatch_handler:
  mr r0, r1
  ld r1, TI_livepatch_sp(r12)
                                hardware_interrupt_common:
                                  do_IRQ+0x8:
                                    mflr    r0          <- saved stack pointer is overwritten
                                    bl      _mcount
                                    ...
                                    std     r27,-40(r1) <- overwrite of thread_info()

  lis r2, STACK_END_MAGIC@h
  ori r2, r2, STACK_END_MAGIC@l
  ld  r12, -8(r1)

Fix the corruption by using r11 register for livepatch stack
manipulation, instead of shuffling task stack and livepatch stack into
r1 register. Using r11 register also avoids disabling/enabling irq's
while setting up the livepatch stack.

Signed-off-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/trace/ftrace_64_mprofile.S | 45 +++++++++-----------------
 1 file changed, 15 insertions(+), 30 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index c98e90b4ea7b..b4e2b7165f79 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -181,34 +181,25 @@ _GLOBAL(ftrace_stub)
 	 *  - we have no stack frame and can not allocate one
 	 *  - LR points back to the original caller (in A)
 	 *  - CTR holds the new NIP in C
-	 *  - r0 & r12 are free
-	 *
-	 * r0 can't be used as the base register for a DS-form load or store, so
-	 * we temporarily shuffle r1 (stack pointer) into r0 and then put it back.
+	 *  - r0, r11 & r12 are free
 	 */
 livepatch_handler:
 	CURRENT_THREAD_INFO(r12, r1)
 
-	/* Save stack pointer into r0 */
-	mr	r0, r1
-
 	/* Allocate 3 x 8 bytes */
-	ld	r1, TI_livepatch_sp(r12)
-	addi	r1, r1, 24
-	std	r1, TI_livepatch_sp(r12)
+	ld	r11, TI_livepatch_sp(r12)
+	addi	r11, r11, 24
+	std	r11, TI_livepatch_sp(r12)
 
 	/* Save toc & real LR on livepatch stack */
-	std	r2,  -24(r1)
+	std	r2,  -24(r11)
 	mflr	r12
-	std	r12, -16(r1)
+	std	r12, -16(r11)
 
 	/* Store stack end marker */
 	lis     r12, STACK_END_MAGIC@h
 	ori     r12, r12, STACK_END_MAGIC@l
-	std	r12, -8(r1)
-
-	/* Restore real stack pointer */
-	mr	r1, r0
+	std	r12, -8(r11)
 
 	/* Put ctr in r12 for global entry and branch there */
 	mfctr	r12
@@ -216,36 +207,30 @@ livepatch_handler:
 
 	/*
 	 * Now we are returning from the patched function to the original
-	 * caller A. We are free to use r0 and r12, and we can use r2 until we
+	 * caller A. We are free to use r11, r12 and we can use r2 until we
 	 * restore it.
 	 */
 
 	CURRENT_THREAD_INFO(r12, r1)
 
-	/* Save stack pointer into r0 */
-	mr	r0, r1
-
-	ld	r1, TI_livepatch_sp(r12)
+	ld	r11, TI_livepatch_sp(r12)
 
 	/* Check stack marker hasn't been trashed */
 	lis     r2,  STACK_END_MAGIC@h
 	ori     r2,  r2, STACK_END_MAGIC@l
-	ld	r12, -8(r1)
+	ld	r12, -8(r11)
 1:	tdne	r12, r2
 	EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0
 
 	/* Restore LR & toc from livepatch stack */
-	ld	r12, -16(r1)
+	ld	r12, -16(r11)
 	mtlr	r12
-	ld	r2,  -24(r1)
+	ld	r2,  -24(r11)
 
 	/* Pop livepatch stack frame */
-	CURRENT_THREAD_INFO(r12, r0)
-	subi	r1, r1, 24
-	std	r1, TI_livepatch_sp(r12)
-
-	/* Restore real stack pointer */
-	mr	r1, r0
+	CURRENT_THREAD_INFO(r12, r1)
+	subi	r11, r11, 24
+	std	r11, TI_livepatch_sp(r12)
 
 	/* Return to original caller of live patched function */
 	blr
-- 
cgit