summaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r--arch/powerpc/platforms/83xx/usb.c4
-rw-r--r--arch/powerpc/platforms/8xx/pic.c3
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype45
-rw-r--r--arch/powerpc/platforms/cell/spu_base.c4
-rw-r--r--arch/powerpc/platforms/embedded6xx/holly.c19
-rw-r--r--arch/powerpc/platforms/powermac/Makefile6
-rw-r--r--arch/powerpc/platforms/powernv/idle.c902
-rw-r--r--arch/powerpc/platforms/powernv/opal-call.c6
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c5
-rw-r--r--arch/powerpc/platforms/powernv/opal.c23
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c35
-rw-r--r--arch/powerpc/platforms/powernv/pci.h2
-rw-r--r--arch/powerpc/platforms/powernv/setup.c5
-rw-r--r--arch/powerpc/platforms/powernv/subcore.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c17
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c13
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c3
-rw-r--r--arch/powerpc/platforms/pseries/pmem.c3
-rw-r--r--arch/powerpc/platforms/pseries/ras.c135
19 files changed, 970 insertions, 262 deletions
diff --git a/arch/powerpc/platforms/83xx/usb.c b/arch/powerpc/platforms/83xx/usb.c
index 5c31d8292d3b..e7c2c3fb011a 100644
--- a/arch/powerpc/platforms/83xx/usb.c
+++ b/arch/powerpc/platforms/83xx/usb.c
@@ -221,8 +221,10 @@ int mpc837x_usb_cfg(void)
int ret = 0;
np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
- if (!np || !of_device_is_available(np))
+ if (!np || !of_device_is_available(np)) {
+ of_node_put(np);
return -ENODEV;
+ }
prop = of_get_property(np, "phy_type", NULL);
if (!prop || (strcmp(prop, "ulpi") && strcmp(prop, "serial"))) {
diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c
index 8d5a25d43ef3..e9617d35fd1f 100644
--- a/arch/powerpc/platforms/8xx/pic.c
+++ b/arch/powerpc/platforms/8xx/pic.c
@@ -153,10 +153,9 @@ int mpc8xx_pic_init(void)
if (mpc8xx_pic_host == NULL) {
printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n");
ret = -ENOMEM;
- goto out;
}
- return 0;
+ ret = 0;
out:
of_node_put(np);
return ret;
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 50cd09b4e05d..d0e172d47574 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -25,6 +25,8 @@ config PPC_BOOK3S_32
bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx"
select PPC_FPU
select PPC_HAVE_PMU_SUPPORT
+ select PPC_HAVE_KUEP
+ select PPC_HAVE_KUAP
config PPC_85xx
bool "Freescale 85xx"
@@ -34,6 +36,9 @@ config PPC_8xx
bool "Freescale 8xx"
select FSL_SOC
select SYS_SUPPORTS_HUGETLBFS
+ select PPC_HAVE_KUEP
+ select PPC_HAVE_KUAP
+ select PPC_MM_SLICES if HUGETLB_PAGE
config 40x
bool "AMCC 40x"
@@ -75,6 +80,7 @@ config PPC_BOOK3S_64
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select ARCH_SUPPORTS_NUMA_BALANCING
select IRQ_WORK
+ select PPC_MM_SLICES
config PPC_BOOK3E_64
bool "Embedded processors"
@@ -326,6 +332,8 @@ config PPC_RADIX_MMU
bool "Radix MMU Support"
depends on PPC_BOOK3S_64 && HUGETLB_PAGE
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
+ select PPC_HAVE_KUEP
+ select PPC_HAVE_KUAP
default y
help
Enable support for the Power ISA 3.0 Radix style MMU. Currently this
@@ -345,6 +353,37 @@ config PPC_RADIX_MMU_DEFAULT
If you're unsure, say Y.
+config PPC_HAVE_KUEP
+ bool
+
+config PPC_KUEP
+ bool "Kernel Userspace Execution Prevention"
+ depends on PPC_HAVE_KUEP
+ default y
+ help
+ Enable support for Kernel Userspace Execution Prevention (KUEP)
+
+ If you're unsure, say Y.
+
+config PPC_HAVE_KUAP
+ bool
+
+config PPC_KUAP
+ bool "Kernel Userspace Access Protection"
+ depends on PPC_HAVE_KUAP
+ default y
+ help
+ Enable support for Kernel Userspace Access Protection (KUAP)
+
+ If you're unsure, say Y.
+
+config PPC_KUAP_DEBUG
+ bool "Extra debugging for Kernel Userspace Access Protection"
+ depends on PPC_HAVE_KUAP && (PPC_RADIX_MMU || PPC_32)
+ help
+ Add extra debugging for Kernel Userspace Access Protection (KUAP)
+ If you're unsure, say N.
+
config ARCH_ENABLE_HUGEPAGE_MIGRATION
def_bool y
depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION
@@ -354,14 +393,16 @@ config PPC_MMU_NOHASH
def_bool y
depends on !PPC_BOOK3S
+config PPC_MMU_NOHASH_32
+ def_bool y
+ depends on PPC_MMU_NOHASH && PPC32
+
config PPC_BOOK3E_MMU
def_bool y
depends on FSL_BOOKE || PPC_BOOK3E
config PPC_MM_SLICES
bool
- default y if PPC_BOOK3S_64
- default y if PPC_8xx && HUGETLB_PAGE
config PPC_HAVE_PMU_SUPPORT
bool
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index 7f12c7b78c0f..6646f152d57b 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -194,7 +194,7 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
* faults need to be deferred to process context.
*/
if ((dsisr & MFC_DSISR_PTE_NOT_FOUND) &&
- (REGION_ID(ea) != USER_REGION_ID)) {
+ (get_region_id(ea) != USER_REGION_ID)) {
spin_unlock(&spu->register_lock);
ret = hash_page(ea,
@@ -224,7 +224,7 @@ static void __spu_kernel_slb(void *addr, struct copro_slb *slb)
unsigned long ea = (unsigned long)addr;
u64 llp;
- if (REGION_ID(ea) == KERNEL_REGION_ID)
+ if (get_region_id(ea) == LINEAR_MAP_REGION_ID)
llp = mmu_psize_defs[mmu_linear_psize].sllp;
else
llp = mmu_psize_defs[mmu_virtual_psize].sllp;
diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c
index 0409714e8070..829bf3697dc9 100644
--- a/arch/powerpc/platforms/embedded6xx/holly.c
+++ b/arch/powerpc/platforms/embedded6xx/holly.c
@@ -44,7 +44,8 @@
#define HOLLY_PCI_CFG_PHYS 0x7c000000
-int holly_exclude_device(struct pci_controller *hose, u_char bus, u_char devfn)
+static int holly_exclude_device(struct pci_controller *hose, u_char bus,
+ u_char devfn)
{
if (bus == 0 && PCI_SLOT(devfn) == 0)
return PCIBIOS_DEVICE_NOT_FOUND;
@@ -187,13 +188,13 @@ static void __init holly_init_IRQ(void)
tsi108_write_reg(TSI108_MPIC_OFFSET + 0x30c, 0);
}
-void holly_show_cpuinfo(struct seq_file *m)
+static void holly_show_cpuinfo(struct seq_file *m)
{
seq_printf(m, "vendor\t\t: IBM\n");
seq_printf(m, "machine\t\t: PPC750 GX/CL\n");
}
-void __noreturn holly_restart(char *cmd)
+static void __noreturn holly_restart(char *cmd)
{
__be32 __iomem *ocn_bar1 = NULL;
unsigned long bar;
@@ -233,18 +234,6 @@ void __noreturn holly_restart(char *cmd)
for (;;) ;
}
-void holly_power_off(void)
-{
- local_irq_disable();
- /* No way to shut power off with software */
- for (;;) ;
-}
-
-void holly_halt(void)
-{
- holly_power_off();
-}
-
/*
* Called very early, device-tree isn't unflattened
*/
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
index 20ebf35d7913..f4247ade71ca 100644
--- a/arch/powerpc/platforms/powermac/Makefile
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -2,6 +2,12 @@
CFLAGS_bootx_init.o += -fPIC
CFLAGS_bootx_init.o += $(call cc-option, -fno-stack-protector)
+KASAN_SANITIZE_bootx_init.o := n
+
+ifdef CONFIG_KASAN
+CFLAGS_bootx_init.o += -DDISABLE_BRANCH_PROFILING
+endif
+
ifdef CONFIG_FUNCTION_TRACER
# Do not trace early boot code
CFLAGS_REMOVE_bootx_init.o = $(CC_FLAGS_FTRACE)
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index e52f9b06dd9c..c9133f7908ca 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -16,6 +16,7 @@
#include <linux/device.h>
#include <linux/cpu.h>
+#include <asm/asm-prototypes.h>
#include <asm/firmware.h>
#include <asm/machdep.h>
#include <asm/opal.h>
@@ -48,10 +49,10 @@ static u64 pnv_default_stop_mask;
static bool default_stop_found;
/*
- * First deep stop state. Used to figure out when to save/restore
- * hypervisor context.
+ * First stop state levels when SPR and TB loss can occur.
*/
-u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
+static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
+static u64 pnv_first_spr_loss_level = MAX_STOP_STATE + 1;
/*
* psscr value and mask of the deepest stop idle state.
@@ -62,6 +63,8 @@ static u64 pnv_deepest_stop_psscr_mask;
static u64 pnv_deepest_stop_flag;
static bool deepest_stop_found;
+static unsigned long power7_offline_type;
+
static int pnv_save_sprs_for_deep_states(void)
{
int cpu;
@@ -72,12 +75,12 @@ static int pnv_save_sprs_for_deep_states(void)
* all cpus at boot. Get these reg values of current cpu and use the
* same across all cpus.
*/
- uint64_t lpcr_val = mfspr(SPRN_LPCR);
- uint64_t hid0_val = mfspr(SPRN_HID0);
- uint64_t hid1_val = mfspr(SPRN_HID1);
- uint64_t hid4_val = mfspr(SPRN_HID4);
- uint64_t hid5_val = mfspr(SPRN_HID5);
- uint64_t hmeer_val = mfspr(SPRN_HMEER);
+ uint64_t lpcr_val = mfspr(SPRN_LPCR);
+ uint64_t hid0_val = mfspr(SPRN_HID0);
+ uint64_t hid1_val = mfspr(SPRN_HID1);
+ uint64_t hid4_val = mfspr(SPRN_HID4);
+ uint64_t hid5_val = mfspr(SPRN_HID5);
+ uint64_t hmeer_val = mfspr(SPRN_HMEER);
uint64_t msr_val = MSR_IDLE;
uint64_t psscr_val = pnv_deepest_stop_psscr_val;
@@ -137,89 +140,6 @@ static int pnv_save_sprs_for_deep_states(void)
return 0;
}
-static void pnv_alloc_idle_core_states(void)
-{
- int i, j;
- int nr_cores = cpu_nr_cores();
- u32 *core_idle_state;
-
- /*
- * core_idle_state - The lower 8 bits track the idle state of
- * each thread of the core.
- *
- * The most significant bit is the lock bit.
- *
- * Initially all the bits corresponding to threads_per_core
- * are set. They are cleared when the thread enters deep idle
- * state like sleep and winkle/stop.
- *
- * Initially the lock bit is cleared. The lock bit has 2
- * purposes:
- * a. While the first thread in the core waking up from
- * idle is restoring core state, it prevents other
- * threads in the core from switching to process
- * context.
- * b. While the last thread in the core is saving the
- * core state, it prevents a different thread from
- * waking up.
- */
- for (i = 0; i < nr_cores; i++) {
- int first_cpu = i * threads_per_core;
- int node = cpu_to_node(first_cpu);
- size_t paca_ptr_array_size;
-
- core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
- *core_idle_state = (1 << threads_per_core) - 1;
- paca_ptr_array_size = (threads_per_core *
- sizeof(struct paca_struct *));
-
- for (j = 0; j < threads_per_core; j++) {
- int cpu = first_cpu + j;
-
- paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state;
- paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING;
- paca_ptrs[cpu]->thread_mask = 1 << j;
- }
- }
-
- update_subcore_sibling_mask();
-
- if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
- int rc = pnv_save_sprs_for_deep_states();
-
- if (likely(!rc))
- return;
-
- /*
- * The stop-api is unable to restore hypervisor
- * resources on wakeup from platform idle states which
- * lose full context. So disable such states.
- */
- supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
- pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
- pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
-
- if (cpu_has_feature(CPU_FTR_ARCH_300) &&
- (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
- /*
- * Use the default stop state for CPU-Hotplug
- * if available.
- */
- if (default_stop_found) {
- pnv_deepest_stop_psscr_val =
- pnv_default_stop_val;
- pnv_deepest_stop_psscr_mask =
- pnv_default_stop_mask;
- pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
- pnv_deepest_stop_psscr_val);
- } else { /* Fallback to snooze loop for CPU-Hotplug */
- deepest_stop_found = false;
- pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
- }
- }
- }
-}
-
u32 pnv_get_supported_cpuidle_states(void)
{
return supported_cpuidle_states;
@@ -238,6 +158,9 @@ static void pnv_fastsleep_workaround_apply(void *info)
*err = 1;
}
+static bool power7_fastsleep_workaround_entry = true;
+static bool power7_fastsleep_workaround_exit = true;
+
/*
* Used to store fastsleep workaround state
* 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
@@ -269,21 +192,15 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
* fastsleep_workaround_applyonce = 1 implies
* fastsleep workaround needs to be left in 'applied' state on all
* the cores. Do this by-
- * 1. Patching out the call to 'undo' workaround in fastsleep exit path
- * 2. Sending ipi to all the cores which have at least one online thread
- * 3. Patching out the call to 'apply' workaround in fastsleep entry
- * path
+ * 1. Disable the 'undo' workaround in fastsleep exit path
+ * 2. Sendi IPIs to all the cores which have at least one online thread
+ * 3. Disable the 'apply' workaround in fastsleep entry path
+ *
* There is no need to send ipi to cores which have all threads
* offlined, as last thread of the core entering fastsleep or deeper
* state would have applied workaround.
*/
- err = patch_instruction(
- (unsigned int *)pnv_fastsleep_workaround_at_exit,
- PPC_INST_NOP);
- if (err) {
- pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit");
- goto fail;
- }
+ power7_fastsleep_workaround_exit = false;
get_online_cpus();
primary_thread_mask = cpu_online_cores_map();
@@ -296,13 +213,7 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
goto fail;
}
- err = patch_instruction(
- (unsigned int *)pnv_fastsleep_workaround_at_entry,
- PPC_INST_NOP);
- if (err) {
- pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry");
- goto fail;
- }
+ power7_fastsleep_workaround_entry = false;
fastsleep_workaround_applyonce = 1;
@@ -315,27 +226,346 @@ static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
show_fastsleep_workaround_applyonce,
store_fastsleep_workaround_applyonce);
-static unsigned long __power7_idle_type(unsigned long type)
+static inline void atomic_start_thread_idle(void)
{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ int thread_nr = cpu_thread_in_core(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+
+ clear_bit(thread_nr, state);
+}
+
+static inline void atomic_stop_thread_idle(void)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ int thread_nr = cpu_thread_in_core(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+
+ set_bit(thread_nr, state);
+}
+
+static inline void atomic_lock_thread_idle(void)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+
+ while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, state)))
+ barrier();
+}
+
+static inline void atomic_unlock_and_stop_thread_idle(void)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long thread = 1UL << cpu_thread_in_core(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+ u64 s = READ_ONCE(*state);
+ u64 new, tmp;
+
+ BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT));
+ BUG_ON(s & thread);
+
+again:
+ new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT;
+ tmp = cmpxchg(state, s, new);
+ if (unlikely(tmp != s)) {
+ s = tmp;
+ goto again;
+ }
+}
+
+static inline void atomic_unlock_thread_idle(void)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+
+ BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state));
+ clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state);
+}
+
+/* P7 and P8 */
+struct p7_sprs {
+ /* per core */
+ u64 tscr;
+ u64 worc;
+
+ /* per subcore */
+ u64 sdr1;
+ u64 rpr;
+
+ /* per thread */
+ u64 lpcr;
+ u64 hfscr;
+ u64 fscr;
+ u64 purr;
+ u64 spurr;
+ u64 dscr;
+ u64 wort;
+
+ /* per thread SPRs that get lost in shallow states */
+ u64 amr;
+ u64 iamr;
+ u64 amor;
+ u64 uamor;
+};
+
+static unsigned long power7_idle_insn(unsigned long type)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+ unsigned long thread = 1UL << cpu_thread_in_core(cpu);
+ unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
unsigned long srr1;
+ bool full_winkle;
+ struct p7_sprs sprs = {}; /* avoid false use-uninitialised */
+ bool sprs_saved = false;
+ int rc;
- if (!prep_irq_for_idle_irqsoff())
- return 0;
+ if (unlikely(type != PNV_THREAD_NAP)) {
+ atomic_lock_thread_idle();
+
+ BUG_ON(!(*state & thread));
+ *state &= ~thread;
+
+ if (power7_fastsleep_workaround_entry) {
+ if ((*state & core_thread_mask) == 0) {
+ rc = opal_config_cpu_idle_state(
+ OPAL_CONFIG_IDLE_FASTSLEEP,
+ OPAL_CONFIG_IDLE_APPLY);
+ BUG_ON(rc);
+ }
+ }
+
+ if (type == PNV_THREAD_WINKLE) {
+ sprs.tscr = mfspr(SPRN_TSCR);
+ sprs.worc = mfspr(SPRN_WORC);
+
+ sprs.sdr1 = mfspr(SPRN_SDR1);
+ sprs.rpr = mfspr(SPRN_RPR);
+
+ sprs.lpcr = mfspr(SPRN_LPCR);
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ sprs.hfscr = mfspr(SPRN_HFSCR);
+ sprs.fscr = mfspr(SPRN_FSCR);
+ }
+ sprs.purr = mfspr(SPRN_PURR);
+ sprs.spurr = mfspr(SPRN_SPURR);
+ sprs.dscr = mfspr(SPRN_DSCR);
+ sprs.wort = mfspr(SPRN_WORT);
+
+ sprs_saved = true;
+
+ /*
+ * Increment winkle counter and set all winkle bits if
+ * all threads are winkling. This allows wakeup side to
+ * distinguish between fast sleep and winkle state
+ * loss. Fast sleep still has to resync the timebase so
+ * this may not be a really big win.
+ */
+ *state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
+ if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS)
+ >> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT
+ == threads_per_core)
+ *state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS;
+ WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
+ }
+
+ atomic_unlock_thread_idle();
+ }
+
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ sprs.amr = mfspr(SPRN_AMR);
+ sprs.iamr = mfspr(SPRN_IAMR);
+ sprs.amor = mfspr(SPRN_AMOR);
+ sprs.uamor = mfspr(SPRN_UAMOR);
+ }
+
+ local_paca->thread_idle_state = type;
+ srr1 = isa206_idle_insn_mayloss(type); /* go idle */
+ local_paca->thread_idle_state = PNV_THREAD_RUNNING;
+
+ WARN_ON_ONCE(!srr1);
+ WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
+
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
+ /*
+ * We don't need an isync after the mtsprs here because
+ * the upcoming mtmsrd is execution synchronizing.
+ */
+ mtspr(SPRN_AMR, sprs.amr);
+ mtspr(SPRN_IAMR, sprs.iamr);
+ mtspr(SPRN_AMOR, sprs.amor);
+ mtspr(SPRN_UAMOR, sprs.uamor);
+ }
+ }
+
+ if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
+ hmi_exception_realmode(NULL);
+
+ if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) {
+ if (unlikely(type != PNV_THREAD_NAP)) {
+ atomic_lock_thread_idle();
+ if (type == PNV_THREAD_WINKLE) {
+ WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
+ *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
+ *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
+ }
+ atomic_unlock_and_stop_thread_idle();
+ }
+ return srr1;
+ }
+
+ /* HV state loss */
+ BUG_ON(type == PNV_THREAD_NAP);
+
+ atomic_lock_thread_idle();
+
+ full_winkle = false;
+ if (type == PNV_THREAD_WINKLE) {
+ WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
+ *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
+ if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) {
+ *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
+ full_winkle = true;
+ BUG_ON(!sprs_saved);
+ }
+ }
+
+ WARN_ON(*state & thread);
+
+ if ((*state & core_thread_mask) != 0)
+ goto core_woken;
+
+ /* Per-core SPRs */
+ if (full_winkle) {
+ mtspr(SPRN_TSCR, sprs.tscr);
+ mtspr(SPRN_WORC, sprs.worc);
+ }
+
+ if (power7_fastsleep_workaround_exit) {
+ rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
+ OPAL_CONFIG_IDLE_UNDO);
+ BUG_ON(rc);
+ }
+
+ /* TB */
+ if (opal_resync_timebase() != OPAL_SUCCESS)
+ BUG();
+
+core_woken:
+ if (!full_winkle)
+ goto subcore_woken;
+
+ if ((*state & local_paca->subcore_sibling_mask) != 0)
+ goto subcore_woken;
+
+ /* Per-subcore SPRs */
+ mtspr(SPRN_SDR1, sprs.sdr1);
+ mtspr(SPRN_RPR, sprs.rpr);
+
+subcore_woken:
+ /*
+ * isync after restoring shared SPRs and before unlocking. Unlock
+ * only contains hwsync which does not necessarily do the right
+ * thing for SPRs.
+ */
+ isync();
+ atomic_unlock_and_stop_thread_idle();
+
+ /* Fast sleep does not lose SPRs */
+ if (!full_winkle)
+ return srr1;
+
+ /* Per-thread SPRs */
+ mtspr(SPRN_LPCR, sprs.lpcr);
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ mtspr(SPRN_HFSCR, sprs.hfscr);
+ mtspr(SPRN_FSCR, sprs.fscr);
+ }
+ mtspr(SPRN_PURR, sprs.purr);
+ mtspr(SPRN_SPURR, sprs.spurr);
+ mtspr(SPRN_DSCR, sprs.dscr);
+ mtspr(SPRN_WORT, sprs.wort);
+
+ mtspr(SPRN_SPRG3, local_paca->sprg_vdso);
+
+ /*
+ * The SLB has to be restored here, but it sometimes still
+ * contains entries, so the __ variant must be used to prevent
+ * multi hits.
+ */
+ __slb_restore_bolted_realmode();
+
+ return srr1;
+}
+
+extern unsigned long idle_kvm_start_guest(unsigned long srr1);
+
+#ifdef CONFIG_HOTPLUG_CPU
+static unsigned long power7_offline(void)
+{
+ unsigned long srr1;
+
+ mtmsr(MSR_IDLE);
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /* Tell KVM we're entering idle. */
+ /******************************************************/
+ /* N O T E W E L L ! ! ! N O T E W E L L */
+ /* The following store to HSTATE_HWTHREAD_STATE(r13) */
+ /* MUST occur in real mode, i.e. with the MMU off, */
+ /* and the MMU must stay off until we clear this flag */
+ /* and test HSTATE_HWTHREAD_REQ(r13) in */
+ /* pnv_powersave_wakeup in this file. */
+ /* The reason is that another thread can switch the */
+ /* MMU to a guest context whenever this flag is set */
+ /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */
+ /* that would potentially cause this thread to start */
+ /* executing instructions from guest memory in */
+ /* hypervisor mode, leading to a host crash or data */
+ /* corruption, or worse. */
+ /******************************************************/
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
+#endif
__ppc64_runlatch_off();
- srr1 = power7_idle_insn(type);
+ srr1 = power7_idle_insn(power7_offline_type);
__ppc64_runlatch_on();
- fini_irq_for_idle_irqsoff();
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ smp_mb();
+ if (local_paca->kvm_hstate.hwthread_req)
+ srr1 = idle_kvm_start_guest(srr1);
+#endif
+
+ mtmsr(MSR_KERNEL);
return srr1;
}
+#endif
void power7_idle_type(unsigned long type)
{
unsigned long srr1;
- srr1 = __power7_idle_type(type);
+ if (!prep_irq_for_idle_irqsoff())
+ return;
+
+ mtmsr(MSR_IDLE);
+ __ppc64_runlatch_off();
+ srr1 = power7_idle_insn(type);
+ __ppc64_runlatch_on();
+ mtmsr(MSR_KERNEL);
+
+ fini_irq_for_idle_irqsoff();
irq_set_pending_from_srr1(srr1);
}
@@ -347,33 +577,292 @@ void power7_idle(void)
power7_idle_type(PNV_THREAD_NAP);
}
-static unsigned long __power9_idle_type(unsigned long stop_psscr_val,
- unsigned long stop_psscr_mask)
+struct p9_sprs {
+ /* per core */
+ u64 ptcr;
+ u64 rpr;
+ u64 tscr;
+ u64 ldbar;
+
+ /* per thread */
+ u64 lpcr;
+ u64 hfscr;
+ u64 fscr;
+ u64 pid;
+ u64 purr;
+ u64 spurr;
+ u64 dscr;
+ u64 wort;
+
+ u64 mmcra;
+ u32 mmcr0;
+ u32 mmcr1;
+ u64 mmcr2;
+
+ /* per thread SPRs that get lost in shallow states */
+ u64 amr;
+ u64 iamr;
+ u64 amor;
+ u64 uamor;
+};
+
+static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
{
- unsigned long psscr;
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+ unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
unsigned long srr1;
+ unsigned long pls;
+ unsigned long mmcr0 = 0;
+ struct p9_sprs sprs = {}; /* avoid false used-uninitialised */
+ bool sprs_saved = false;
- if (!prep_irq_for_idle_irqsoff())
- return 0;
+ if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
+ /* EC=ESL=0 case */
+
+ BUG_ON(!mmu_on);
+
+ /*
+ * Wake synchronously. SRESET via xscom may still cause
+ * a 0x100 powersave wakeup with SRR1 reason!
+ */
+ srr1 = isa300_idle_stop_noloss(psscr); /* go idle */
+ if (likely(!srr1))
+ return 0;
+
+ /*
+ * Registers not saved, can't recover!
+ * This would be a hardware bug
+ */
+ BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
+
+ goto out;
+ }
+
+ /* EC=ESL=1 case */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) {
+ local_paca->requested_psscr = psscr;
+ /* order setting requested_psscr vs testing dont_stop */
+ smp_mb();
+ if (atomic_read(&local_paca->dont_stop)) {
+ local_paca->requested_psscr = 0;
+ return 0;
+ }
+ }
+#endif
+
+ if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
+ /*
+ * POWER9 DD2 can incorrectly set PMAO when waking up
+ * after a state-loss idle. Saving and restoring MMCR0
+ * over idle is a workaround.
+ */
+ mmcr0 = mfspr(SPRN_MMCR0);
+ }
+ if ((psscr & PSSCR_RL_MASK) >= pnv_first_spr_loss_level) {
+ sprs.lpcr = mfspr(SPRN_LPCR);
+ sprs.hfscr = mfspr(SPRN_HFSCR);
+ sprs.fscr = mfspr(SPRN_FSCR);
+ sprs.pid = mfspr(SPRN_PID);
+ sprs.purr = mfspr(SPRN_PURR);
+ sprs.spurr = mfspr(SPRN_SPURR);
+ sprs.dscr = mfspr(SPRN_DSCR);
+ sprs.wort = mfspr(SPRN_WORT);
+
+ sprs.mmcra = mfspr(SPRN_MMCRA);
+ sprs.mmcr0 = mfspr(SPRN_MMCR0);
+ sprs.mmcr1 = mfspr(SPRN_MMCR1);
+ sprs.mmcr2 = mfspr(SPRN_MMCR2);
+
+ sprs.ptcr = mfspr(SPRN_PTCR);
+ sprs.rpr = mfspr(SPRN_RPR);
+ sprs.tscr = mfspr(SPRN_TSCR);
+ sprs.ldbar = mfspr(SPRN_LDBAR);
+
+ sprs_saved = true;
+
+ atomic_start_thread_idle();
+ }
+
+ sprs.amr = mfspr(SPRN_AMR);
+ sprs.iamr = mfspr(SPRN_IAMR);
+ sprs.amor = mfspr(SPRN_AMOR);
+ sprs.uamor = mfspr(SPRN_UAMOR);
+
+ srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ local_paca->requested_psscr = 0;
+#endif
psscr = mfspr(SPRN_PSSCR);
- psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
+ WARN_ON_ONCE(!srr1);
+ WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
+
+ if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
+ unsigned long mmcra;
+
+ /*
+ * We don't need an isync after the mtsprs here because the
+ * upcoming mtmsrd is execution synchronizing.
+ */
+ mtspr(SPRN_AMR, sprs.amr);
+ mtspr(SPRN_IAMR, sprs.iamr);
+ mtspr(SPRN_AMOR, sprs.amor);
+ mtspr(SPRN_UAMOR, sprs.uamor);
+
+ /*
+ * Workaround for POWER9 DD2.0, if we lost resources, the ERAT
+ * might have been corrupted and needs flushing. We also need
+ * to reload MMCR0 (see mmcr0 comment above).
+ */
+ if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
+ asm volatile(PPC_INVALIDATE_ERAT);
+ mtspr(SPRN_MMCR0, mmcr0);
+ }
+
+ /*
+ * DD2.2 and earlier need to set then clear bit 60 in MMCRA
+ * to ensure the PMU starts running.
+ */
+ mmcra = mfspr(SPRN_MMCRA);
+ mmcra |= PPC_BIT(60);
+ mtspr(SPRN_MMCRA, mmcra);
+ mmcra &= ~PPC_BIT(60);
+ mtspr(SPRN_MMCRA, mmcra);
+ }
+
+ if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
+ hmi_exception_realmode(NULL);
+
+ /*
+ * On POWER9, SRR1 bits do not match exactly as expected.
+ * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
+ * just always test PSSCR for SPR/TB state loss.
+ */
+ pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
+ if (likely(pls < pnv_first_spr_loss_level)) {
+ if (sprs_saved)
+ atomic_stop_thread_idle();
+ goto out;
+ }
+
+ /* HV state loss */
+ BUG_ON(!sprs_saved);
+
+ atomic_lock_thread_idle();
+
+ if ((*state & core_thread_mask) != 0)
+ goto core_woken;
+
+ /* Per-core SPRs */
+ mtspr(SPRN_PTCR, sprs.ptcr);
+ mtspr(SPRN_RPR, sprs.rpr);
+ mtspr(SPRN_TSCR, sprs.tscr);
+ mtspr(SPRN_LDBAR, sprs.ldbar);
+
+ if (pls >= pnv_first_tb_loss_level) {
+ /* TB loss */
+ if (opal_resync_timebase() != OPAL_SUCCESS)
+ BUG();
+ }
+
+ /*
+ * isync after restoring shared SPRs and before unlocking. Unlock
+ * only contains hwsync which does not necessarily do the right
+ * thing for SPRs.
+ */
+ isync();
+
+core_woken:
+ atomic_unlock_and_stop_thread_idle();
+
+ /* Per-thread SPRs */
+ mtspr(SPRN_LPCR, sprs.lpcr);
+ mtspr(SPRN_HFSCR, sprs.hfscr);
+ mtspr(SPRN_FSCR, sprs.fscr);
+ mtspr(SPRN_PID, sprs.pid);
+ mtspr(SPRN_PURR, sprs.purr);
+ mtspr(SPRN_SPURR, sprs.spurr);
+ mtspr(SPRN_DSCR, sprs.dscr);
+ mtspr(SPRN_WORT, sprs.wort);
+
+ mtspr(SPRN_MMCRA, sprs.mmcra);
+ mtspr(SPRN_MMCR0, sprs.mmcr0);
+ mtspr(SPRN_MMCR1, sprs.mmcr1);
+ mtspr(SPRN_MMCR2, sprs.mmcr2);
+
+ mtspr(SPRN_SPRG3, local_paca->sprg_vdso);
+
+ if (!radix_enabled())
+ __slb_restore_bolted_realmode();
+
+out:
+ if (mmu_on)
+ mtmsr(MSR_KERNEL);
+
+ return srr1;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static unsigned long power9_offline_stop(unsigned long psscr)
+{
+ unsigned long srr1;
+
+#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
__ppc64_runlatch_off();
- srr1 = power9_idle_stop(psscr);
+ srr1 = power9_idle_stop(psscr, true);
__ppc64_runlatch_on();
+#else
+ /*
+ * Tell KVM we're entering idle.
+ * This does not have to be done in real mode because the P9 MMU
+ * is independent per-thread. Some steppings share radix/hash mode
+ * between threads, but in that case KVM has a barrier sync in real
+ * mode before and after switching between radix and hash.
+ *
+ * kvm_start_guest must still be called in real mode though, hence
+ * the false argument.
+ */
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
- fini_irq_for_idle_irqsoff();
+ __ppc64_runlatch_off();
+ srr1 = power9_idle_stop(psscr, false);
+ __ppc64_runlatch_on();
+
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ smp_mb();
+ if (local_paca->kvm_hstate.hwthread_req)
+ srr1 = idle_kvm_start_guest(srr1);
+ mtmsr(MSR_KERNEL);
+#endif
return srr1;
}
+#endif
void power9_idle_type(unsigned long stop_psscr_val,
unsigned long stop_psscr_mask)
{
+ unsigned long psscr;
unsigned long srr1;
- srr1 = __power9_idle_type(stop_psscr_val, stop_psscr_mask);
+ if (!prep_irq_for_idle_irqsoff())
+ return;
+
+ psscr = mfspr(SPRN_PSSCR);
+ psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
+
+ __ppc64_runlatch_off();
+ srr1 = power9_idle_stop(psscr, true);
+ __ppc64_runlatch_on();
+
+ fini_irq_for_idle_irqsoff();
+
irq_set_pending_from_srr1(srr1);
}
@@ -409,7 +898,7 @@ void pnv_power9_force_smt4_catch(void)
atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop);
}
/* order setting dont_stop vs testing requested_psscr */
- mb();
+ smp_mb();
for (thr = 0; thr < threads_per_core; ++thr) {
if (!paca_ptrs[cpu0+thr]->requested_psscr)
++awake_threads;
@@ -481,7 +970,6 @@ void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
unsigned long pnv_cpu_offline(unsigned int cpu)
{
unsigned long srr1;
- u32 idle_states = pnv_get_supported_cpuidle_states();
__ppc64_runlatch_off();
@@ -492,15 +980,8 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
pnv_deepest_stop_psscr_val;
srr1 = power9_offline_stop(psscr);
-
- } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
- (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
- srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
- } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
- (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
- srr1 = power7_idle_insn(PNV_THREAD_SLEEP);
- } else if (idle_states & OPAL_PM_NAP_ENABLED) {
- srr1 = power7_idle_insn(PNV_THREAD_NAP);
+ } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) {
+ srr1 = power7_offline();
} else {
/* This is the fallback method. We emulate snooze */
while (!generic_check_cpu_restart(cpu)) {
@@ -596,33 +1077,44 @@ int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
* @dt_idle_states: Number of idle state entries
* Returns 0 on success
*/
-static int __init pnv_power9_idle_init(void)
+static void __init pnv_power9_idle_init(void)
{
u64 max_residency_ns = 0;
int i;
/*
- * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask},
- * and the pnv_default_stop_{val,mask}.
- *
- * pnv_first_deep_stop_state should be set to the first stop
- * level to cause hypervisor state loss.
- *
* pnv_deepest_stop_{val,mask} should be set to values corresponding to
* the deepest stop state.
*
* pnv_default_stop_{val,mask} should be set to values corresponding to
- * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state.
+ * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state.
*/
- pnv_first_deep_stop_state = MAX_STOP_STATE;
+ pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
+ pnv_first_spr_loss_level = MAX_STOP_STATE + 1;
for (i = 0; i < nr_pnv_idle_states; i++) {
int err;
struct pnv_idle_states_t *state = &pnv_idle_states[i];
u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK;
+ if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
+ (pnv_first_tb_loss_level > psscr_rl))
+ pnv_first_tb_loss_level = psscr_rl;
+
if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
- pnv_first_deep_stop_state > psscr_rl)
- pnv_first_deep_stop_state = psscr_rl;
+ (pnv_first_spr_loss_level > psscr_rl))
+ pnv_first_spr_loss_level = psscr_rl;
+
+ /*
+ * The idle code does not deal with TB loss occurring
+ * in a shallower state than SPR loss, so force it to
+ * behave like SPRs are lost if TB is lost. POWER9 would
+ * never encouter this, but a POWER8 core would if it
+ * implemented the stop instruction. So this is for forward
+ * compatibility.
+ */
+ if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
+ (pnv_first_spr_loss_level > psscr_rl))
+ pnv_first_spr_loss_level = psscr_rl;
err = validate_psscr_val_mask(&state->psscr_val,
&state->psscr_mask,
@@ -647,6 +1139,7 @@ static int __init pnv_power9_idle_init(void)
pnv_default_stop_val = state->psscr_val;
pnv_default_stop_mask = state->psscr_mask;
default_stop_found = true;
+ WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT);
}
}
@@ -666,10 +1159,40 @@ static int __init pnv_power9_idle_init(void)
pnv_deepest_stop_psscr_mask);
}
- pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop = 0x%llx\n",
- pnv_first_deep_stop_state);
+ pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%lld\n",
+ pnv_first_spr_loss_level);
- return 0;
+ pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%lld\n",
+ pnv_first_tb_loss_level);
+}
+
+static void __init pnv_disable_deep_states(void)
+{
+ /*
+ * The stop-api is unable to restore hypervisor
+ * resources on wakeup from platform idle states which
+ * lose full context. So disable such states.
+ */
+ supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
+ pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
+ pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300) &&
+ (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
+ /*
+ * Use the default stop state for CPU-Hotplug
+ * if available.
+ */
+ if (default_stop_found) {
+ pnv_deepest_stop_psscr_val = pnv_default_stop_val;
+ pnv_deepest_stop_psscr_mask = pnv_default_stop_mask;
+ pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
+ pnv_deepest_stop_psscr_val);
+ } else { /* Fallback to snooze loop for CPU-Hotplug */
+ deepest_stop_found = false;
+ pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
+ }
+ }
}
/*
@@ -684,10 +1207,8 @@ static void __init pnv_probe_idle_states(void)
return;
}
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- if (pnv_power9_idle_init())
- return;
- }
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ pnv_power9_idle_init();
for (i = 0; i < nr_pnv_idle_states; i++)
supported_cpuidle_states |= pnv_idle_states[i].flags;
@@ -807,11 +1328,33 @@ out:
static int __init pnv_init_idle_states(void)
{
+ int cpu;
int rc = 0;
- supported_cpuidle_states = 0;
+
+ /* Set up PACA fields */
+ for_each_present_cpu(cpu) {
+ struct paca_struct *p = paca_ptrs[cpu];
+
+ p->idle_state = 0;
+ if (cpu == cpu_first_thread_sibling(cpu))
+ p->idle_state = (1 << threads_per_core) - 1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /* P7/P8 nap */
+ p->thread_idle_state = PNV_THREAD_RUNNING;
+ } else {
+ /* P9 stop */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ p->requested_psscr = 0;
+ atomic_set(&p->dont_stop, 0);
+#endif
+ }
+ }
/* In case we error out nr_pnv_idle_states will be zero */
nr_pnv_idle_states = 0;
+ supported_cpuidle_states = 0;
+
if (cpuidle_disable != IDLE_NO_OVERRIDE)
goto out;
rc = pnv_parse_cpuidle_dt();
@@ -819,27 +1362,40 @@ static int __init pnv_init_idle_states(void)
return rc;
pnv_probe_idle_states();
- if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
- patch_instruction(
- (unsigned int *)pnv_fastsleep_workaround_at_entry,
- PPC_INST_NOP);
- patch_instruction(
- (unsigned int *)pnv_fastsleep_workaround_at_exit,
- PPC_INST_NOP);
- } else {
- /*
- * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
- * workaround is needed to use fastsleep. Provide sysfs
- * control to choose how this workaround has to be applied.
- */
- device_create_file(cpu_subsys.dev_root,
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
+ power7_fastsleep_workaround_entry = false;
+ power7_fastsleep_workaround_exit = false;
+ } else {
+ /*
+ * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
+ * workaround is needed to use fastsleep. Provide sysfs
+ * control to choose how this workaround has to be
+ * applied.
+ */
+ device_create_file(cpu_subsys.dev_root,
&dev_attr_fastsleep_workaround_applyonce);
- }
+ }
+
+ update_subcore_sibling_mask();
+
+ if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) {
+ ppc_md.power_save = power7_idle;
+ power7_offline_type = PNV_THREAD_NAP;
+ }
- pnv_alloc_idle_core_states();
+ if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) &&
+ (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT))
+ power7_offline_type = PNV_THREAD_WINKLE;
+ else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) ||
+ (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1))
+ power7_offline_type = PNV_THREAD_SLEEP;
+ }
- if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED)
- ppc_md.power_save = power7_idle;
+ if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
+ if (pnv_save_sprs_for_deep_states())
+ pnv_disable_deep_states();
+ }
out:
return 0;
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
index daad8c45c8e7..36c8fa3647a2 100644
--- a/arch/powerpc/platforms/powernv/opal-call.c
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -121,6 +121,8 @@ static int64_t opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3,
#define OPAL_CALL(name, opcode) \
int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3, \
+ int64_t a4, int64_t a5, int64_t a6, int64_t a7); \
+int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3, \
int64_t a4, int64_t a5, int64_t a6, int64_t a7) \
{ \
return opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode); \
@@ -218,6 +220,7 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);
+OPAL_CALL(opal_handle_hmi2, OPAL_HANDLE_HMI2);
OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE);
OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG);
OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION);
@@ -260,6 +263,9 @@ OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO);
OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO);
OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC);
OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP);
+OPAL_CALL(opal_xive_get_queue_state, OPAL_XIVE_GET_QUEUE_STATE);
+OPAL_CALL(opal_xive_set_queue_state, OPAL_XIVE_SET_QUEUE_STATE);
+OPAL_CALL(opal_xive_get_vp_state, OPAL_XIVE_GET_VP_STATE);
OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET);
OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT);
OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT);
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 58a07948c76e..3e497b91d210 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -127,7 +127,7 @@ static int imc_get_mem_addr_nest(struct device_node *node,
nr_chips))
goto error;
- pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(*pmu_ptr->mem_info),
+ pmu_ptr->mem_info = kcalloc(nr_chips + 1, sizeof(*pmu_ptr->mem_info),
GFP_KERNEL);
if (!pmu_ptr->mem_info)
goto error;
@@ -284,6 +284,9 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
case IMC_TYPE_THREAD:
domain = IMC_DOMAIN_THREAD;
break;
+ case IMC_TYPE_TRACE:
+ domain = IMC_DOMAIN_TRACE;
+ break;
default:
pr_warn("IMC Unknown Device type \n");
domain = -1;
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 2b0eca104f86..f2b063b027f0 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -505,7 +505,7 @@ static int opal_recover_mce(struct pt_regs *regs,
recovered = 0;
}
- if (!recovered && evt->severity == MCE_SEV_ERROR_SYNC) {
+ if (!recovered && evt->sync_error) {
/*
* Try to kill processes if we get a synchronous machine check
* (e.g., one caused by execution of this instruction). This
@@ -614,6 +614,27 @@ int opal_hmi_exception_early(struct pt_regs *regs)
return 0;
}
+int opal_hmi_exception_early2(struct pt_regs *regs)
+{
+ s64 rc;
+ __be64 out_flags;
+
+ /*
+ * call opal hmi handler.
+ * Check 64-bit flag mask to find out if an event was generated,
+ * and whether TB is still valid or not etc.
+ */
+ rc = opal_handle_hmi2(&out_flags);
+ if (rc != OPAL_SUCCESS)
+ return 0;
+
+ if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_NEW_EVENT)
+ local_paca->hmi_event_available = 1;
+ if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_TOD_TB_FAIL)
+ tb_invalid = true;
+ return 1;
+}
+
/* HMI exception handler called in virtual mode during check_irq_replay. */
int opal_handle_hmi_exception(struct pt_regs *regs)
{
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 3ead4c237ed0..126602b4e399 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -847,11 +847,11 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
if (rc)
- pe_warn(pe, "OPAL error %ld remove self from PELTV\n", rc);
+ pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc);
rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
bcomp, dcomp, fcomp, OPAL_UNMAP_PE);
if (rc)
- pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
+ pe_err(pe, "OPAL error %lld trying to setup PELT table\n", rc);
pe->pbus = NULL;
pe->pdev = NULL;
@@ -1174,11 +1174,12 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
pe->rid = bus->busn_res.start << 8;
if (all)
- pe_info(pe, "Secondary bus %d..%d associated with PE#%x\n",
- bus->busn_res.start, bus->busn_res.end, pe->pe_number);
+ pe_info(pe, "Secondary bus %pad..%pad associated with PE#%x\n",
+ &bus->busn_res.start, &bus->busn_res.end,
+ pe->pe_number);
else
- pe_info(pe, "Secondary bus %d associated with PE#%x\n",
- bus->busn_res.start, pe->pe_number);
+ pe_info(pe, "Secondary bus %pad associated with PE#%x\n",
+ &bus->busn_res.start, pe->pe_number);
if (pnv_ioda_configure_pe(phb, pe)) {
/* XXX What do we do here ? */
@@ -1448,7 +1449,7 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe
tbl = pe->table_group.tables[0];
rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
if (rc)
- pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
+ pe_warn(pe, "OPAL error %lld release DMA window\n", rc);
pnv_pci_ioda2_set_bypass(pe, false);
if (pe->table_group.group) {
@@ -1836,7 +1837,7 @@ static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev,
struct pnv_ioda_pe *pe;
if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
- return -ENODEV;
+ return false;
pe = &phb->ioda.pe_array[pdn->pe_number];
if (pe->tce_bypass_enabled) {
@@ -1859,7 +1860,7 @@ static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev,
/* Configure the bypass mode */
s64 rc = pnv_pci_ioda_dma_64bit_bypass(pe);
if (rc)
- return rc;
+ return false;
/* 4GB offset bypasses 32-bit space */
pdev->dev.archdata.dma_offset = (1ULL << 32);
return true;
@@ -2286,8 +2287,8 @@ found:
__pa(addr) + tce32_segsz * i,
tce32_segsz, IOMMU_PAGE_SIZE_4K);
if (rc) {
- pe_err(pe, " Failed to configure 32-bit TCE table,"
- " err %ld\n", rc);
+ pe_err(pe, " Failed to configure 32-bit TCE table, err %lld\n",
+ rc);
goto fail;
}
}
@@ -2332,9 +2333,9 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
const __u64 win_size = tbl->it_size << tbl->it_page_shift;
- pe_info(pe, "Setting up window#%d %llx..%llx pg=%x\n", num,
- start_addr, start_addr + win_size - 1,
- IOMMU_PAGE_SIZE(tbl));
+ pe_info(pe, "Setting up window#%d %llx..%llx pg=%lx\n",
+ num, start_addr, start_addr + win_size - 1,
+ IOMMU_PAGE_SIZE(tbl));
/*
* Map TCE table through TVT. The TVE index is the PE number
@@ -2348,7 +2349,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
size << 3,
IOMMU_PAGE_SIZE(tbl));
if (rc) {
- pe_err(pe, "Failed to configure TCE table, err %ld\n", rc);
+ pe_err(pe, "Failed to configure TCE table, err %lld\n", rc);
return rc;
}
@@ -3450,7 +3451,7 @@ static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
#ifdef CONFIG_IOMMU_API
rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
if (rc)
- pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
+ pe_warn(pe, "OPAL error %lld release DMA window\n", rc);
#endif
pnv_pci_ioda2_set_bypass(pe, false);
@@ -3484,7 +3485,7 @@ static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe,
phb->ioda.reserved_pe_idx, win, 0, idx);
if (rc != OPAL_SUCCESS)
- pe_warn(pe, "Error %ld unmapping (%d) segment#%d\n",
+ pe_warn(pe, "Error %lld unmapping (%d) segment#%d\n",
rc, win, idx);
map[idx] = IODA_INVALID_PE;
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 8e36da379252..be26ab3d99e0 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -2,6 +2,7 @@
#ifndef __POWERNV_PCI_H
#define __POWERNV_PCI_H
+#include <linux/compiler.h> /* for __printf */
#include <linux/iommu.h>
#include <asm/iommu.h>
#include <asm/msi_bitmap.h>
@@ -204,6 +205,7 @@ extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
__u64 window_size, __u32 levels);
extern int pnv_eeh_post_init(void);
+__printf(3, 4)
extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
const char *fmt, ...);
#define pe_err(pe, fmt, ...) \
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 14befee4b3f1..3cf40f689aac 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -401,7 +401,10 @@ static void __init pnv_setup_machdep_opal(void)
/* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */
ppc_md.machine_check_exception = opal_machine_check;
ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
- ppc_md.hmi_exception_early = opal_hmi_exception_early;
+ if (opal_check_token(OPAL_HANDLE_HMI2))
+ ppc_md.hmi_exception_early = opal_hmi_exception_early2;
+ else
+ ppc_md.hmi_exception_early = opal_hmi_exception_early;
ppc_md.handle_hmi_exception = opal_handle_hmi_exception;
}
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index 45563004feda..1d7a9fd30dd1 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -183,7 +183,7 @@ static void unsplit_core(void)
cpu = smp_processor_id();
if (cpu_thread_in_core(cpu) != 0) {
while (mfspr(SPRN_HID0) & mask)
- power7_idle_insn(PNV_THREAD_NAP);
+ power7_idle_type(PNV_THREAD_NAP);
per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT;
return;
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index d291b618a559..47087832f8b2 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -379,7 +379,7 @@ static int dlpar_add_lmb(struct drmem_lmb *);
static int dlpar_remove_lmb(struct drmem_lmb *lmb)
{
unsigned long block_sz;
- int nid, rc;
+ int rc;
if (!lmb_is_removable(lmb))
return -EINVAL;
@@ -389,14 +389,14 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
return rc;
block_sz = pseries_memory_block_size();
- nid = memory_add_physaddr_to_nid(lmb->base_addr);
- __remove_memory(nid, lmb->base_addr, block_sz);
+ __remove_memory(lmb->nid, lmb->base_addr, block_sz);
/* Update memory regions for memory remove */
memblock_remove(lmb->base_addr, block_sz);
invalidate_lmb_associativity_index(lmb);
+ lmb_clear_nid(lmb);
lmb->flags &= ~DRCONF_MEM_ASSIGNED;
return 0;
@@ -653,7 +653,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
static int dlpar_add_lmb(struct drmem_lmb *lmb)
{
unsigned long block_sz;
- int nid, rc;
+ int rc;
if (lmb->flags & DRCONF_MEM_ASSIGNED)
return -EINVAL;
@@ -664,13 +664,11 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
return rc;
}
+ lmb_set_nid(lmb);
block_sz = memory_block_size_bytes();
- /* Find the node id for this address */
- nid = memory_add_physaddr_to_nid(lmb->base_addr);
-
/* Add the memory */
- rc = __add_memory(nid, lmb->base_addr, block_sz);
+ rc = __add_memory(lmb->nid, lmb->base_addr, block_sz);
if (rc) {
invalidate_lmb_associativity_index(lmb);
return rc;
@@ -678,8 +676,9 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
rc = dlpar_online_lmb(lmb);
if (rc) {
- __remove_memory(nid, lmb->base_addr, block_sz);
+ __remove_memory(lmb->nid, lmb->base_addr, block_sz);
invalidate_lmb_associativity_index(lmb);
+ lmb_clear_nid(lmb);
} else {
lmb->flags |= DRCONF_MEM_ASSIGNED;
}
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 36eb1ddbac69..03bbb299320e 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -105,7 +105,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
unsigned long attrs)
{
u64 proto_tce;
- __be64 *tcep, *tces;
+ __be64 *tcep;
u64 rpn;
proto_tce = TCE_PCI_READ; // Read allowed
@@ -113,7 +113,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
if (direction != DMA_TO_DEVICE)
proto_tce |= TCE_PCI_WRITE;
- tces = tcep = ((__be64 *)tbl->it_base) + index;
+ tcep = ((__be64 *)tbl->it_base) + index;
while (npages--) {
/* can't move this out since we might cross MEMBLOCK boundary */
@@ -129,9 +129,9 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
{
- __be64 *tcep, *tces;
+ __be64 *tcep;
- tces = tcep = ((__be64 *)tbl->it_base) + index;
+ tcep = ((__be64 *)tbl->it_base) + index;
while (npages--)
*(tcep++) = 0;
@@ -945,7 +945,7 @@ static phys_addr_t ddw_memory_hotplug_max(void)
for_each_node_by_type(memory, "memory") {
unsigned long start, size;
- int ranges, n_mem_addr_cells, n_mem_size_cells, len;
+ int n_mem_addr_cells, n_mem_size_cells, len;
const __be32 *memcell_buf;
memcell_buf = of_get_property(memory, "reg", &len);
@@ -955,9 +955,6 @@ static phys_addr_t ddw_memory_hotplug_max(void)
n_mem_addr_cells = of_n_addr_cells(memory);
n_mem_size_cells = of_n_size_cells(memory);
- /* ranges in cell */
- ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
-
start = of_read_number(memcell_buf, n_mem_addr_cells);
memcell_buf += n_mem_addr_cells;
size = of_read_number(memcell_buf, n_mem_size_cells);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index f2a9f0adc2d3..1034ef1fe2b4 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -901,8 +901,10 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
break;
case H_PARAMETER:
+ pr_warn("Invalid argument from H_RESIZE_HPT_PREPARE\n");
return -EINVAL;
case H_RESOURCE:
+ pr_warn("Operation not permitted from H_RESIZE_HPT_PREPARE\n");
return -EPERM;
default:
pr_warn("Unexpected error %d from H_RESIZE_HPT_PREPARE\n", rc);
@@ -918,7 +920,6 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
if (rc != 0) {
switch (state.commit_rc) {
case H_PTEG_FULL:
- pr_warn("Hash collision while resizing HPT\n");
return -ENOSPC;
default:
diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c
index 27f0a915c8a9..f860a897a9e0 100644
--- a/arch/powerpc/platforms/pseries/pmem.c
+++ b/arch/powerpc/platforms/pseries/pmem.c
@@ -106,7 +106,7 @@ static ssize_t pmem_drc_remove_node(u32 drc_index)
int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
{
- u32 count, drc_index;
+ u32 drc_index;
int rc;
/* slim chance, but we might get a hotplug event while booting */
@@ -123,7 +123,6 @@ int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
return -EINVAL;
}
- count = hp_elog->_drc_u.drc_count;
drc_index = hp_elog->_drc_u.drc_index;
lock_device_hotplug();
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 452dcfd7e5dd..c97d15352f9f 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -539,44 +539,44 @@ static void pseries_print_mce_info(struct pt_regs *regs,
int disposition = rtas_error_disposition(errp);
static const char * const initiators[] = {
- "Unknown",
- "CPU",
- "PCI",
- "ISA",
- "Memory",
- "Power Mgmt",
+ [0] = "Unknown",
+ [1] = "CPU",
+ [2] = "PCI",
+ [3] = "ISA",
+ [4] = "Memory",
+ [5] = "Power Mgmt",
};
static const char * const mc_err_types[] = {
- "UE",
- "SLB",
- "ERAT",
- "Unknown",
- "TLB",
- "D-Cache",
- "Unknown",
- "I-Cache",
+ [0] = "UE",
+ [1] = "SLB",
+ [2] = "ERAT",
+ [3] = "Unknown",
+ [4] = "TLB",
+ [5] = "D-Cache",
+ [6] = "Unknown",
+ [7] = "I-Cache",
};
static const char * const mc_ue_types[] = {
- "Indeterminate",
- "Instruction fetch",
- "Page table walk ifetch",
- "Load/Store",
- "Page table walk Load/Store",
+ [0] = "Indeterminate",
+ [1] = "Instruction fetch",
+ [2] = "Page table walk ifetch",
+ [3] = "Load/Store",
+ [4] = "Page table walk Load/Store",
};
/* SLB sub errors valid values are 0x0, 0x1, 0x2 */
static const char * const mc_slb_types[] = {
- "Parity",
- "Multihit",
- "Indeterminate",
+ [0] = "Parity",
+ [1] = "Multihit",
+ [2] = "Indeterminate",
};
/* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */
static const char * const mc_soft_types[] = {
- "Unknown",
- "Parity",
- "Multihit",
- "Indeterminate",
+ [0] = "Unknown",
+ [1] = "Parity",
+ [2] = "Multihit",
+ [3] = "Indeterminate",
};
if (!rtas_error_extended(errp)) {
@@ -707,6 +707,87 @@ out:
return disposition;
}
+#ifdef CONFIG_MEMORY_FAILURE
+
+static DEFINE_PER_CPU(int, rtas_ue_count);
+static DEFINE_PER_CPU(unsigned long, rtas_ue_paddr[MAX_MC_EVT]);
+
+#define UE_EFFECTIVE_ADDR_PROVIDED 0x40
+#define UE_LOGICAL_ADDR_PROVIDED 0x20
+
+
+static void pseries_hwpoison_work_fn(struct work_struct *work)
+{
+ unsigned long paddr;
+ int index;
+
+ while (__this_cpu_read(rtas_ue_count) > 0) {
+ index = __this_cpu_read(rtas_ue_count) - 1;
+ paddr = __this_cpu_read(rtas_ue_paddr[index]);
+ memory_failure(paddr >> PAGE_SHIFT, 0);
+ __this_cpu_dec(rtas_ue_count);
+ }
+}
+
+static DECLARE_WORK(hwpoison_work, pseries_hwpoison_work_fn);
+
+static void queue_ue_paddr(unsigned long paddr)
+{
+ int index;
+
+ index = __this_cpu_inc_return(rtas_ue_count) - 1;
+ if (index >= MAX_MC_EVT) {
+ __this_cpu_dec(rtas_ue_count);
+ return;
+ }
+ this_cpu_write(rtas_ue_paddr[index], paddr);
+ schedule_work(&hwpoison_work);
+}
+
+static void pseries_do_memory_failure(struct pt_regs *regs,
+ struct pseries_mc_errorlog *mce_log)
+{
+ unsigned long paddr;
+
+ if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
+ paddr = be64_to_cpu(mce_log->logical_address);
+ } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
+ unsigned long pfn;
+
+ pfn = addr_to_pfn(regs,
+ be64_to_cpu(mce_log->effective_address));
+ if (pfn == ULONG_MAX)
+ return;
+ paddr = pfn << PAGE_SHIFT;
+ } else {
+ return;
+ }
+ queue_ue_paddr(paddr);
+}
+
+static void pseries_process_ue(struct pt_regs *regs,
+ struct rtas_error_log *errp)
+{
+ struct pseries_errorlog *pseries_log;
+ struct pseries_mc_errorlog *mce_log;
+
+ if (!rtas_error_extended(errp))
+ return;
+
+ pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
+ if (!pseries_log)
+ return;
+
+ mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
+
+ if (mce_log->error_type == MC_ERROR_TYPE_UE)
+ pseries_do_memory_failure(regs, mce_log);
+}
+#else
+static inline void pseries_process_ue(struct pt_regs *regs,
+ struct rtas_error_log *errp) { }
+#endif /*CONFIG_MEMORY_FAILURE */
+
/*
* Process MCE rtas errlog event.
*/
@@ -765,6 +846,8 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
recovered = 1;
}
+ pseries_process_ue(regs, err);
+
/* Queue irq work to log this rtas event later. */
irq_work_queue(&mce_errlog_process_work);