diff options
Diffstat (limited to 'arch/powerpc/platforms/powernv/idle.c')
| -rw-r--r-- | arch/powerpc/platforms/powernv/idle.c | 1331 |
1 files changed, 1074 insertions, 257 deletions
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 2abee070373f..d98b933e4984 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * PowerNV cpuidle code * * Copyright 2015 IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/types.h> @@ -17,13 +13,15 @@ #include <linux/cpu.h> #include <asm/firmware.h> +#include <asm/interrupt.h> #include <asm/machdep.h> #include <asm/opal.h> #include <asm/cputhreads.h> #include <asm/cpuidle.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/smp.h> #include <asm/runlatch.h> +#include <asm/dbell.h> #include "powernv.h" #include "subcore.h" @@ -35,6 +33,8 @@ #define P9_STOP_SPR_PSSCR 855 static u32 supported_cpuidle_states; +struct pnv_idle_states_t *pnv_idle_states; +int nr_pnv_idle_states; /* * The default stop state that will be used by ppc_md.power_save @@ -45,10 +45,10 @@ static u64 pnv_default_stop_mask; static bool default_stop_found; /* - * First deep stop state. Used to figure out when to save/restore - * hypervisor context. + * First stop state levels when SPR and TB loss can occur. */ -u64 pnv_first_deep_stop_state = MAX_STOP_STATE; +static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1; +static u64 deep_spr_loss_state = MAX_STOP_STATE + 1; /* * psscr value and mask of the deepest stop idle state. @@ -56,9 +56,12 @@ u64 pnv_first_deep_stop_state = MAX_STOP_STATE; */ static u64 pnv_deepest_stop_psscr_val; static u64 pnv_deepest_stop_psscr_mask; +static u64 pnv_deepest_stop_flag; static bool deepest_stop_found; -static int pnv_save_sprs_for_deep_states(void) +static unsigned long power7_offline_type; + +static int __init pnv_save_sprs_for_deep_states(void) { int cpu; int rc; @@ -68,18 +71,15 @@ static int pnv_save_sprs_for_deep_states(void) * all cpus at boot. Get these reg values of current cpu and use the * same across all cpus. */ - uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1; - uint64_t hid0_val = mfspr(SPRN_HID0); - uint64_t hid1_val = mfspr(SPRN_HID1); - uint64_t hid4_val = mfspr(SPRN_HID4); - uint64_t hid5_val = mfspr(SPRN_HID5); - uint64_t hmeer_val = mfspr(SPRN_HMEER); + uint64_t lpcr_val = mfspr(SPRN_LPCR); + uint64_t hid0_val = mfspr(SPRN_HID0); + uint64_t hmeer_val = mfspr(SPRN_HMEER); uint64_t msr_val = MSR_IDLE; uint64_t psscr_val = pnv_deepest_stop_psscr_val; - for_each_possible_cpu(cpu) { + for_each_present_cpu(cpu) { uint64_t pir = get_hard_smp_processor_id(cpu); - uint64_t hsprg0_val = (uint64_t)&paca[cpu]; + uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu]; rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); if (rc != 0) @@ -112,8 +112,11 @@ static int pnv_save_sprs_for_deep_states(void) if (rc != 0) return rc; - /* Only p8 needs to set extra HID regiters */ + /* Only p8 needs to set extra HID registers */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + uint64_t hid1_val = mfspr(SPRN_HID1); + uint64_t hid4_val = mfspr(SPRN_HID4); + uint64_t hid5_val = mfspr(SPRN_HID5); rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); if (rc != 0) @@ -133,62 +136,6 @@ static int pnv_save_sprs_for_deep_states(void) return 0; } -static void pnv_alloc_idle_core_states(void) -{ - int i, j; - int nr_cores = cpu_nr_cores(); - u32 *core_idle_state; - - /* - * core_idle_state - The lower 8 bits track the idle state of - * each thread of the core. - * - * The most significant bit is the lock bit. - * - * Initially all the bits corresponding to threads_per_core - * are set. They are cleared when the thread enters deep idle - * state like sleep and winkle/stop. - * - * Initially the lock bit is cleared. The lock bit has 2 - * purposes: - * a. While the first thread in the core waking up from - * idle is restoring core state, it prevents other - * threads in the core from switching to process - * context. - * b. While the last thread in the core is saving the - * core state, it prevents a different thread from - * waking up. - */ - for (i = 0; i < nr_cores; i++) { - int first_cpu = i * threads_per_core; - int node = cpu_to_node(first_cpu); - size_t paca_ptr_array_size; - - core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node); - *core_idle_state = (1 << threads_per_core) - 1; - paca_ptr_array_size = (threads_per_core * - sizeof(struct paca_struct *)); - - for (j = 0; j < threads_per_core; j++) { - int cpu = first_cpu + j; - - paca[cpu].core_idle_state_ptr = core_idle_state; - paca[cpu].thread_idle_state = PNV_THREAD_RUNNING; - paca[cpu].thread_mask = 1 << j; - if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) - continue; - paca[cpu].thread_sibling_pacas = - kmalloc_node(paca_ptr_array_size, - GFP_KERNEL, node); - } - } - - update_subcore_sibling_mask(); - - if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) - pnv_save_sprs_for_deep_states(); -} - u32 pnv_get_supported_cpuidle_states(void) { return supported_cpuidle_states; @@ -198,15 +145,22 @@ EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); static void pnv_fastsleep_workaround_apply(void *info) { + int cpu = smp_processor_id(); int rc; int *err = info; + if (cpu_first_thread_sibling(cpu) != cpu) + return; + rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, OPAL_CONFIG_IDLE_APPLY); if (rc) *err = 1; } +static bool power7_fastsleep_workaround_entry = true; +static bool power7_fastsleep_workaround_exit = true; + /* * Used to store fastsleep workaround state * 0 - Workaround applied/undone at fastsleep entry/exit path (Default) @@ -224,7 +178,6 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - cpumask_t primary_thread_mask; int err; u8 val; @@ -238,40 +191,25 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, * fastsleep_workaround_applyonce = 1 implies * fastsleep workaround needs to be left in 'applied' state on all * the cores. Do this by- - * 1. Patching out the call to 'undo' workaround in fastsleep exit path - * 2. Sending ipi to all the cores which have at least one online thread - * 3. Patching out the call to 'apply' workaround in fastsleep entry - * path + * 1. Disable the 'undo' workaround in fastsleep exit path + * 2. Sendi IPIs to all the cores which have at least one online thread + * 3. Disable the 'apply' workaround in fastsleep entry path + * * There is no need to send ipi to cores which have all threads * offlined, as last thread of the core entering fastsleep or deeper * state would have applied workaround. */ - err = patch_instruction( - (unsigned int *)pnv_fastsleep_workaround_at_exit, - PPC_INST_NOP); - if (err) { - pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit"); - goto fail; - } + power7_fastsleep_workaround_exit = false; - get_online_cpus(); - primary_thread_mask = cpu_online_cores_map(); - on_each_cpu_mask(&primary_thread_mask, - pnv_fastsleep_workaround_apply, - &err, 1); - put_online_cpus(); + cpus_read_lock(); + on_each_cpu(pnv_fastsleep_workaround_apply, &err, 1); + cpus_read_unlock(); if (err) { pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply"); goto fail; } - err = patch_instruction( - (unsigned int *)pnv_fastsleep_workaround_at_entry, - PPC_INST_NOP); - if (err) { - pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry"); - goto fail; - } + power7_fastsleep_workaround_entry = false; fastsleep_workaround_applyonce = 1; @@ -284,31 +222,353 @@ static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600, show_fastsleep_workaround_applyonce, store_fastsleep_workaround_applyonce); -static unsigned long __power7_idle_type(unsigned long type) +static inline void atomic_start_thread_idle(void) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + int thread_nr = cpu_thread_in_core(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + + clear_bit(thread_nr, state); +} + +static inline void atomic_stop_thread_idle(void) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + int thread_nr = cpu_thread_in_core(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + + set_bit(thread_nr, state); +} + +static inline void atomic_lock_thread_idle(void) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long *lock = &paca_ptrs[first]->idle_lock; + + while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, lock))) + barrier(); +} + +static inline void atomic_unlock_and_stop_thread_idle(void) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long thread = 1UL << cpu_thread_in_core(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + unsigned long *lock = &paca_ptrs[first]->idle_lock; + u64 s = READ_ONCE(*state); + u64 new, tmp; + + BUG_ON(!(READ_ONCE(*lock) & PNV_CORE_IDLE_LOCK_BIT)); + BUG_ON(s & thread); + +again: + new = s | thread; + tmp = cmpxchg(state, s, new); + if (unlikely(tmp != s)) { + s = tmp; + goto again; + } + clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock); +} + +static inline void atomic_unlock_thread_idle(void) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long *lock = &paca_ptrs[first]->idle_lock; + + BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, lock)); + clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock); +} + +/* P7 and P8 */ +struct p7_sprs { + /* per core */ + u64 tscr; + u64 worc; + + /* per subcore */ + u64 sdr1; + u64 rpr; + + /* per thread */ + u64 lpcr; + u64 hfscr; + u64 fscr; + u64 purr; + u64 spurr; + u64 dscr; + u64 wort; + + /* per thread SPRs that get lost in shallow states */ + u64 amr; + u64 iamr; + u64 uamor; + /* amor is restored to constant ~0 */ +}; + +static unsigned long power7_idle_insn(unsigned long type) { + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + unsigned long thread = 1UL << cpu_thread_in_core(cpu); + unsigned long core_thread_mask = (1UL << threads_per_core) - 1; unsigned long srr1; + bool full_winkle; + struct p7_sprs sprs = {}; /* avoid false use-uninitialised */ + bool sprs_saved = false; + int rc; - if (!prep_irq_for_idle_irqsoff()) - return 0; + if (unlikely(type != PNV_THREAD_NAP)) { + atomic_lock_thread_idle(); + + BUG_ON(!(*state & thread)); + *state &= ~thread; + + if (power7_fastsleep_workaround_entry) { + if ((*state & core_thread_mask) == 0) { + rc = opal_config_cpu_idle_state( + OPAL_CONFIG_IDLE_FASTSLEEP, + OPAL_CONFIG_IDLE_APPLY); + BUG_ON(rc); + } + } + + if (type == PNV_THREAD_WINKLE) { + sprs.tscr = mfspr(SPRN_TSCR); + sprs.worc = mfspr(SPRN_WORC); + + sprs.sdr1 = mfspr(SPRN_SDR1); + sprs.rpr = mfspr(SPRN_RPR); + + sprs.lpcr = mfspr(SPRN_LPCR); + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + sprs.hfscr = mfspr(SPRN_HFSCR); + sprs.fscr = mfspr(SPRN_FSCR); + } + sprs.purr = mfspr(SPRN_PURR); + sprs.spurr = mfspr(SPRN_SPURR); + sprs.dscr = mfspr(SPRN_DSCR); + sprs.wort = mfspr(SPRN_WORT); + + sprs_saved = true; + + /* + * Increment winkle counter and set all winkle bits if + * all threads are winkling. This allows wakeup side to + * distinguish between fast sleep and winkle state + * loss. Fast sleep still has to resync the timebase so + * this may not be a really big win. + */ + *state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; + if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) + >> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT + == threads_per_core) + *state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS; + WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); + } + + atomic_unlock_thread_idle(); + } + + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + sprs.amr = mfspr(SPRN_AMR); + sprs.iamr = mfspr(SPRN_IAMR); + sprs.uamor = mfspr(SPRN_UAMOR); + } + + local_paca->thread_idle_state = type; + srr1 = isa206_idle_insn_mayloss(type); /* go idle */ + local_paca->thread_idle_state = PNV_THREAD_RUNNING; + + WARN_ON_ONCE(!srr1); + WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); + + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { + /* + * We don't need an isync after the mtsprs here because + * the upcoming mtmsrd is execution synchronizing. + */ + mtspr(SPRN_AMR, sprs.amr); + mtspr(SPRN_IAMR, sprs.iamr); + mtspr(SPRN_AMOR, ~0); + mtspr(SPRN_UAMOR, sprs.uamor); + } + } + + if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) + hmi_exception_realmode(NULL); + + if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) { + if (unlikely(type != PNV_THREAD_NAP)) { + atomic_lock_thread_idle(); + if (type == PNV_THREAD_WINKLE) { + WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); + *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; + *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT); + } + atomic_unlock_and_stop_thread_idle(); + } + return srr1; + } + + /* HV state loss */ + BUG_ON(type == PNV_THREAD_NAP); + + atomic_lock_thread_idle(); + + full_winkle = false; + if (type == PNV_THREAD_WINKLE) { + WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); + *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; + if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) { + *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT); + full_winkle = true; + BUG_ON(!sprs_saved); + } + } + + WARN_ON(*state & thread); + + if ((*state & core_thread_mask) != 0) + goto core_woken; + + /* Per-core SPRs */ + if (full_winkle) { + mtspr(SPRN_TSCR, sprs.tscr); + mtspr(SPRN_WORC, sprs.worc); + } + + if (power7_fastsleep_workaround_exit) { + rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, + OPAL_CONFIG_IDLE_UNDO); + BUG_ON(rc); + } + + /* TB */ + if (opal_resync_timebase() != OPAL_SUCCESS) + BUG(); + +core_woken: + if (!full_winkle) + goto subcore_woken; + + if ((*state & local_paca->subcore_sibling_mask) != 0) + goto subcore_woken; + + /* Per-subcore SPRs */ + mtspr(SPRN_SDR1, sprs.sdr1); + mtspr(SPRN_RPR, sprs.rpr); + +subcore_woken: + /* + * isync after restoring shared SPRs and before unlocking. Unlock + * only contains hwsync which does not necessarily do the right + * thing for SPRs. + */ + isync(); + atomic_unlock_and_stop_thread_idle(); + + /* Fast sleep does not lose SPRs */ + if (!full_winkle) + return srr1; + + /* Per-thread SPRs */ + mtspr(SPRN_LPCR, sprs.lpcr); + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + mtspr(SPRN_HFSCR, sprs.hfscr); + mtspr(SPRN_FSCR, sprs.fscr); + } + mtspr(SPRN_PURR, sprs.purr); + mtspr(SPRN_SPURR, sprs.spurr); + mtspr(SPRN_DSCR, sprs.dscr); + mtspr(SPRN_WORT, sprs.wort); + + mtspr(SPRN_SPRG3, local_paca->sprg_vdso); + +#ifdef CONFIG_PPC_64S_HASH_MMU + /* + * The SLB has to be restored here, but it sometimes still + * contains entries, so the __ variant must be used to prevent + * multi hits. + */ + __slb_restore_bolted_realmode(); +#endif + + return srr1; +} + +extern unsigned long idle_kvm_start_guest(unsigned long srr1); + +#ifdef CONFIG_HOTPLUG_CPU +static unsigned long power7_offline(void) +{ + unsigned long srr1; + + mtmsr(MSR_IDLE); + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* Tell KVM we're entering idle. */ + /******************************************************/ + /* N O T E W E L L ! ! ! N O T E W E L L */ + /* The following store to HSTATE_HWTHREAD_STATE(r13) */ + /* MUST occur in real mode, i.e. with the MMU off, */ + /* and the MMU must stay off until we clear this flag */ + /* and test HSTATE_HWTHREAD_REQ(r13) in */ + /* pnv_powersave_wakeup in this file. */ + /* The reason is that another thread can switch the */ + /* MMU to a guest context whenever this flag is set */ + /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */ + /* that would potentially cause this thread to start */ + /* executing instructions from guest memory in */ + /* hypervisor mode, leading to a host crash or data */ + /* corruption, or worse. */ + /******************************************************/ + local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE; +#endif __ppc64_runlatch_off(); - srr1 = power7_idle_insn(type); + srr1 = power7_idle_insn(power7_offline_type); __ppc64_runlatch_on(); - fini_irq_for_idle_irqsoff(); +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL; + /* Order setting hwthread_state vs. testing hwthread_req */ + smp_mb(); + if (local_paca->kvm_hstate.hwthread_req) + srr1 = idle_kvm_start_guest(srr1); +#endif + + mtmsr(MSR_KERNEL); return srr1; } +#endif void power7_idle_type(unsigned long type) { unsigned long srr1; - srr1 = __power7_idle_type(type); + if (!prep_irq_for_idle_irqsoff()) + return; + + mtmsr(MSR_IDLE); + __ppc64_runlatch_off(); + srr1 = power7_idle_insn(type); + __ppc64_runlatch_on(); + mtmsr(MSR_KERNEL); + + fini_irq_for_idle_irqsoff(); irq_set_pending_from_srr1(srr1); } -void power7_idle(void) +static void power7_idle(void) { if (!powersave_nap) return; @@ -316,45 +576,481 @@ void power7_idle(void) power7_idle_type(PNV_THREAD_NAP); } -static unsigned long __power9_idle_type(unsigned long stop_psscr_val, - unsigned long stop_psscr_mask) +struct p9_sprs { + /* per core */ + u64 ptcr; + u64 rpr; + u64 tscr; + u64 ldbar; + + /* per thread */ + u64 lpcr; + u64 hfscr; + u64 fscr; + u64 pid; + u64 purr; + u64 spurr; + u64 dscr; + u64 ciabr; + + u64 mmcra; + u32 mmcr0; + u32 mmcr1; + u64 mmcr2; + + /* per thread SPRs that get lost in shallow states */ + u64 amr; + u64 iamr; + u64 amor; + u64 uamor; +}; + +static unsigned long power9_idle_stop(unsigned long psscr) { - unsigned long psscr; + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + unsigned long core_thread_mask = (1UL << threads_per_core) - 1; unsigned long srr1; + unsigned long pls; + unsigned long mmcr0 = 0; + unsigned long mmcra = 0; + struct p9_sprs sprs = {}; /* avoid false used-uninitialised */ + bool sprs_saved = false; - if (!prep_irq_for_idle_irqsoff()) - return 0; + if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { + /* EC=ESL=0 case */ + + /* + * Wake synchronously. SRESET via xscom may still cause + * a 0x100 powersave wakeup with SRR1 reason! + */ + srr1 = isa300_idle_stop_noloss(psscr); /* go idle */ + if (likely(!srr1)) + return 0; + + /* + * Registers not saved, can't recover! + * This would be a hardware bug + */ + BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS); + + goto out; + } + + /* EC=ESL=1 case */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) { + local_paca->requested_psscr = psscr; + /* order setting requested_psscr vs testing dont_stop */ + smp_mb(); + if (atomic_read(&local_paca->dont_stop)) { + local_paca->requested_psscr = 0; + return 0; + } + } +#endif + + if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { + /* + * POWER9 DD2 can incorrectly set PMAO when waking up + * after a state-loss idle. Saving and restoring MMCR0 + * over idle is a workaround. + */ + mmcr0 = mfspr(SPRN_MMCR0); + } + + if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) { + sprs.lpcr = mfspr(SPRN_LPCR); + sprs.hfscr = mfspr(SPRN_HFSCR); + sprs.fscr = mfspr(SPRN_FSCR); + sprs.pid = mfspr(SPRN_PID); + sprs.purr = mfspr(SPRN_PURR); + sprs.spurr = mfspr(SPRN_SPURR); + sprs.dscr = mfspr(SPRN_DSCR); + sprs.ciabr = mfspr(SPRN_CIABR); + + sprs.mmcra = mfspr(SPRN_MMCRA); + sprs.mmcr0 = mfspr(SPRN_MMCR0); + sprs.mmcr1 = mfspr(SPRN_MMCR1); + sprs.mmcr2 = mfspr(SPRN_MMCR2); + + sprs.ptcr = mfspr(SPRN_PTCR); + sprs.rpr = mfspr(SPRN_RPR); + sprs.tscr = mfspr(SPRN_TSCR); + if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) + sprs.ldbar = mfspr(SPRN_LDBAR); + + sprs_saved = true; + + atomic_start_thread_idle(); + } + + sprs.amr = mfspr(SPRN_AMR); + sprs.iamr = mfspr(SPRN_IAMR); + sprs.uamor = mfspr(SPRN_UAMOR); + + srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + local_paca->requested_psscr = 0; +#endif psscr = mfspr(SPRN_PSSCR); - psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val; - __ppc64_runlatch_off(); - srr1 = power9_idle_stop(psscr); - __ppc64_runlatch_on(); + WARN_ON_ONCE(!srr1); + WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); - fini_irq_for_idle_irqsoff(); + if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { + /* + * We don't need an isync after the mtsprs here because the + * upcoming mtmsrd is execution synchronizing. + */ + mtspr(SPRN_AMR, sprs.amr); + mtspr(SPRN_IAMR, sprs.iamr); + mtspr(SPRN_AMOR, ~0); + mtspr(SPRN_UAMOR, sprs.uamor); + + /* + * Workaround for POWER9 DD2.0, if we lost resources, the ERAT + * might have been corrupted and needs flushing. We also need + * to reload MMCR0 (see mmcr0 comment above). + */ + if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { + asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT); + mtspr(SPRN_MMCR0, mmcr0); + } + + /* + * DD2.2 and earlier need to set then clear bit 60 in MMCRA + * to ensure the PMU starts running. + */ + mmcra = mfspr(SPRN_MMCRA); + mmcra |= PPC_BIT(60); + mtspr(SPRN_MMCRA, mmcra); + mmcra &= ~PPC_BIT(60); + mtspr(SPRN_MMCRA, mmcra); + } + + if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) + hmi_exception_realmode(NULL); + + /* + * On POWER9, SRR1 bits do not match exactly as expected. + * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so + * just always test PSSCR for SPR/TB state loss. + */ + pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; + if (likely(pls < deep_spr_loss_state)) { + if (sprs_saved) + atomic_stop_thread_idle(); + goto out; + } + + /* HV state loss */ + BUG_ON(!sprs_saved); + + atomic_lock_thread_idle(); + + if ((*state & core_thread_mask) != 0) + goto core_woken; + + /* Per-core SPRs */ + mtspr(SPRN_PTCR, sprs.ptcr); + mtspr(SPRN_RPR, sprs.rpr); + mtspr(SPRN_TSCR, sprs.tscr); + + if (pls >= pnv_first_tb_loss_level) { + /* TB loss */ + if (opal_resync_timebase() != OPAL_SUCCESS) + BUG(); + } + + /* + * isync after restoring shared SPRs and before unlocking. Unlock + * only contains hwsync which does not necessarily do the right + * thing for SPRs. + */ + isync(); + +core_woken: + atomic_unlock_and_stop_thread_idle(); + + /* Per-thread SPRs */ + mtspr(SPRN_LPCR, sprs.lpcr); + mtspr(SPRN_HFSCR, sprs.hfscr); + mtspr(SPRN_FSCR, sprs.fscr); + mtspr(SPRN_PID, sprs.pid); + mtspr(SPRN_PURR, sprs.purr); + mtspr(SPRN_SPURR, sprs.spurr); + mtspr(SPRN_DSCR, sprs.dscr); + mtspr(SPRN_CIABR, sprs.ciabr); + + mtspr(SPRN_MMCRA, sprs.mmcra); + mtspr(SPRN_MMCR0, sprs.mmcr0); + mtspr(SPRN_MMCR1, sprs.mmcr1); + mtspr(SPRN_MMCR2, sprs.mmcr2); + if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) + mtspr(SPRN_LDBAR, sprs.ldbar); + + mtspr(SPRN_SPRG3, local_paca->sprg_vdso); + + if (!radix_enabled()) + __slb_restore_bolted_realmode(); + +out: + mtmsr(MSR_KERNEL); return srr1; } -void power9_idle_type(unsigned long stop_psscr_val, +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +/* + * This is used in working around bugs in thread reconfiguration + * on POWER9 (at least up to Nimbus DD2.2) relating to transactional + * memory and the way that XER[SO] is checkpointed. + * This function forces the core into SMT4 in order by asking + * all other threads not to stop, and sending a message to any + * that are in a stop state. + * Must be called with preemption disabled. + */ +void pnv_power9_force_smt4_catch(void) +{ + int cpu, cpu0, thr; + int awake_threads = 1; /* this thread is awake */ + int poke_threads = 0; + int need_awake = threads_per_core; + + cpu = smp_processor_id(); + cpu0 = cpu & ~(threads_per_core - 1); + for (thr = 0; thr < threads_per_core; ++thr) { + if (cpu != cpu0 + thr) + atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop); + } + /* order setting dont_stop vs testing requested_psscr */ + smp_mb(); + for (thr = 0; thr < threads_per_core; ++thr) { + if (!paca_ptrs[cpu0+thr]->requested_psscr) + ++awake_threads; + else + poke_threads |= (1 << thr); + } + + /* If at least 3 threads are awake, the core is in SMT4 already */ + if (awake_threads < need_awake) { + /* We have to wake some threads; we'll use msgsnd */ + for (thr = 0; thr < threads_per_core; ++thr) { + if (poke_threads & (1 << thr)) { + ppc_msgsnd_sync(); + ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, + paca_ptrs[cpu0+thr]->hw_cpu_id); + } + } + /* now spin until at least 3 threads are awake */ + do { + for (thr = 0; thr < threads_per_core; ++thr) { + if ((poke_threads & (1 << thr)) && + !paca_ptrs[cpu0+thr]->requested_psscr) { + ++awake_threads; + poke_threads &= ~(1 << thr); + } + } + } while (awake_threads < need_awake); + } +} +EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch); + +void pnv_power9_force_smt4_release(void) +{ + int cpu, cpu0, thr; + + cpu = smp_processor_id(); + cpu0 = cpu & ~(threads_per_core - 1); + + /* clear all the dont_stop flags */ + for (thr = 0; thr < threads_per_core; ++thr) { + if (cpu != cpu0 + thr) + atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop); + } +} +EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release); +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ + +struct p10_sprs { + /* + * SPRs that get lost in shallow states: + * + * P10 loses CR, LR, CTR, FPSCR, VSCR, XER, TAR, SPRG2, and HSPRG1 + * isa300 idle routines restore CR, LR. + * CTR is volatile + * idle thread doesn't use FP or VEC + * kernel doesn't use TAR + * HSPRG1 is only live in HV interrupt entry + * SPRG2 is only live in KVM guests, KVM handles it. + */ +}; + +static unsigned long power10_idle_stop(unsigned long psscr) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + unsigned long core_thread_mask = (1UL << threads_per_core) - 1; + unsigned long srr1; + unsigned long pls; +// struct p10_sprs sprs = {}; /* avoid false used-uninitialised */ + bool sprs_saved = false; + + if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { + /* EC=ESL=0 case */ + + /* + * Wake synchronously. SRESET via xscom may still cause + * a 0x100 powersave wakeup with SRR1 reason! + */ + srr1 = isa300_idle_stop_noloss(psscr); /* go idle */ + if (likely(!srr1)) + return 0; + + /* + * Registers not saved, can't recover! + * This would be a hardware bug + */ + BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS); + + goto out; + } + + /* EC=ESL=1 case */ + if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) { + /* XXX: save SPRs for deep state loss here. */ + + sprs_saved = true; + + atomic_start_thread_idle(); + } + + srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ + + psscr = mfspr(SPRN_PSSCR); + + WARN_ON_ONCE(!srr1); + WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); + + if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) + hmi_exception_realmode(NULL); + + /* + * On POWER10, SRR1 bits do not match exactly as expected. + * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so + * just always test PSSCR for SPR/TB state loss. + */ + pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; + if (likely(pls < deep_spr_loss_state)) { + if (sprs_saved) + atomic_stop_thread_idle(); + goto out; + } + + /* HV state loss */ + BUG_ON(!sprs_saved); + + atomic_lock_thread_idle(); + + if ((*state & core_thread_mask) != 0) + goto core_woken; + + /* XXX: restore per-core SPRs here */ + + if (pls >= pnv_first_tb_loss_level) { + /* TB loss */ + if (opal_resync_timebase() != OPAL_SUCCESS) + BUG(); + } + + /* + * isync after restoring shared SPRs and before unlocking. Unlock + * only contains hwsync which does not necessarily do the right + * thing for SPRs. + */ + isync(); + +core_woken: + atomic_unlock_and_stop_thread_idle(); + + /* XXX: restore per-thread SPRs here */ + + if (!radix_enabled()) + __slb_restore_bolted_realmode(); + +out: + mtmsr(MSR_KERNEL); + + return srr1; +} + +#ifdef CONFIG_HOTPLUG_CPU +static unsigned long arch300_offline_stop(unsigned long psscr) +{ + unsigned long srr1; + + if (cpu_has_feature(CPU_FTR_ARCH_31)) + srr1 = power10_idle_stop(psscr); + else + srr1 = power9_idle_stop(psscr); + + return srr1; +} +#endif + +void arch300_idle_type(unsigned long stop_psscr_val, unsigned long stop_psscr_mask) { + unsigned long psscr; unsigned long srr1; - srr1 = __power9_idle_type(stop_psscr_val, stop_psscr_mask); + if (!prep_irq_for_idle_irqsoff()) + return; + + psscr = mfspr(SPRN_PSSCR); + psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val; + + __ppc64_runlatch_off(); + if (cpu_has_feature(CPU_FTR_ARCH_31)) + srr1 = power10_idle_stop(psscr); + else + srr1 = power9_idle_stop(psscr); + __ppc64_runlatch_on(); + + fini_irq_for_idle_irqsoff(); + irq_set_pending_from_srr1(srr1); } /* * Used for ppc_md.power_save which needs a function with no parameters */ -void power9_idle(void) +static void arch300_idle(void) { - power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask); + arch300_idle_type(pnv_default_stop_val, pnv_default_stop_mask); } #ifdef CONFIG_HOTPLUG_CPU + +void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val) +{ + u64 pir = get_hard_smp_processor_id(cpu); + + mtspr(SPRN_LPCR, lpcr_val); + + /* + * Program the LPCR via stop-api only if the deepest stop state + * can lose hypervisor context. + */ + if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) + opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); +} + /* * pnv_cpu_offline: A function that puts the CPU into the deepest * available platform idle state on a CPU-Offline. @@ -363,7 +1059,6 @@ void power9_idle(void) unsigned long pnv_cpu_offline(unsigned int cpu) { unsigned long srr1; - u32 idle_states = pnv_get_supported_cpuidle_states(); __ppc64_runlatch_off(); @@ -373,15 +1068,9 @@ unsigned long pnv_cpu_offline(unsigned int cpu) psscr = mfspr(SPRN_PSSCR); psscr = (psscr & ~pnv_deepest_stop_psscr_mask) | pnv_deepest_stop_psscr_val; - srr1 = power9_idle_stop(psscr); - - } else if (idle_states & OPAL_PM_WINKLE_ENABLED) { - srr1 = power7_idle_insn(PNV_THREAD_WINKLE); - } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) || - (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { - srr1 = power7_idle_insn(PNV_THREAD_SLEEP); - } else if (idle_states & OPAL_PM_NAP_ENABLED) { - srr1 = power7_idle_insn(PNV_THREAD_NAP); + srr1 = arch300_offline_stop(psscr); + } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) { + srr1 = power7_offline(); } else { /* This is the fallback method. We emulate snooze */ while (!generic_check_cpu_restart(cpu)) { @@ -435,7 +1124,7 @@ unsigned long pnv_cpu_offline(unsigned int cpu) * stop instruction */ -int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) +int __init validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) { int err = 0; @@ -477,97 +1166,85 @@ int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) * @dt_idle_states: Number of idle state entries * Returns 0 on success */ -static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags, - int dt_idle_states) +static void __init pnv_arch300_idle_init(void) { - u64 *psscr_val = NULL; - u64 *psscr_mask = NULL; - u32 *residency_ns = NULL; u64 max_residency_ns = 0; - int rc = 0, i; - - psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), GFP_KERNEL); - psscr_mask = kcalloc(dt_idle_states, sizeof(*psscr_mask), GFP_KERNEL); - residency_ns = kcalloc(dt_idle_states, sizeof(*residency_ns), - GFP_KERNEL); - - if (!psscr_val || !psscr_mask || !residency_ns) { - rc = -1; - goto out; - } - - if (of_property_read_u64_array(np, - "ibm,cpu-idle-state-psscr", - psscr_val, dt_idle_states)) { - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n"); - rc = -1; - goto out; - } - - if (of_property_read_u64_array(np, - "ibm,cpu-idle-state-psscr-mask", - psscr_mask, dt_idle_states)) { - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n"); - rc = -1; - goto out; - } + int i; - if (of_property_read_u32_array(np, - "ibm,cpu-idle-state-residency-ns", - residency_ns, dt_idle_states)) { - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n"); - rc = -1; - goto out; - } + /* stop is not really architected, we only have p9,p10 drivers */ + if (!pvr_version_is(PVR_POWER10) && !pvr_version_is(PVR_POWER9)) + return; /* - * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask}, - * and the pnv_default_stop_{val,mask}. - * - * pnv_first_deep_stop_state should be set to the first stop - * level to cause hypervisor state loss. - * * pnv_deepest_stop_{val,mask} should be set to values corresponding to * the deepest stop state. * * pnv_default_stop_{val,mask} should be set to values corresponding to - * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state. + * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state. */ - pnv_first_deep_stop_state = MAX_STOP_STATE; - for (i = 0; i < dt_idle_states; i++) { + pnv_first_tb_loss_level = MAX_STOP_STATE + 1; + deep_spr_loss_state = MAX_STOP_STATE + 1; + for (i = 0; i < nr_pnv_idle_states; i++) { int err; - u64 psscr_rl = psscr_val[i] & PSSCR_RL_MASK; + struct pnv_idle_states_t *state = &pnv_idle_states[i]; + u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK; + + /* No deep loss driver implemented for POWER10 yet */ + if (pvr_version_is(PVR_POWER10) && + state->flags & (OPAL_PM_TIMEBASE_STOP|OPAL_PM_LOSE_FULL_CONTEXT)) + continue; + + if ((state->flags & OPAL_PM_TIMEBASE_STOP) && + (pnv_first_tb_loss_level > psscr_rl)) + pnv_first_tb_loss_level = psscr_rl; + + if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) && + (deep_spr_loss_state > psscr_rl)) + deep_spr_loss_state = psscr_rl; - if ((flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) && - (pnv_first_deep_stop_state > psscr_rl)) - pnv_first_deep_stop_state = psscr_rl; + /* + * The idle code does not deal with TB loss occurring + * in a shallower state than SPR loss, so force it to + * behave like SPRs are lost if TB is lost. POWER9 would + * never encounter this, but a POWER8 core would if it + * implemented the stop instruction. So this is for forward + * compatibility. + */ + if ((state->flags & OPAL_PM_TIMEBASE_STOP) && + (deep_spr_loss_state > psscr_rl)) + deep_spr_loss_state = psscr_rl; - err = validate_psscr_val_mask(&psscr_val[i], &psscr_mask[i], - flags[i]); + err = validate_psscr_val_mask(&state->psscr_val, + &state->psscr_mask, + state->flags); if (err) { - report_invalid_psscr_val(psscr_val[i], err); + report_invalid_psscr_val(state->psscr_val, err); continue; } - if (max_residency_ns < residency_ns[i]) { - max_residency_ns = residency_ns[i]; - pnv_deepest_stop_psscr_val = psscr_val[i]; - pnv_deepest_stop_psscr_mask = psscr_mask[i]; + state->valid = true; + + if (max_residency_ns < state->residency_ns) { + max_residency_ns = state->residency_ns; + pnv_deepest_stop_psscr_val = state->psscr_val; + pnv_deepest_stop_psscr_mask = state->psscr_mask; + pnv_deepest_stop_flag = state->flags; deepest_stop_found = true; } if (!default_stop_found && - (flags[i] & OPAL_PM_STOP_INST_FAST)) { - pnv_default_stop_val = psscr_val[i]; - pnv_default_stop_mask = psscr_mask[i]; + (state->flags & OPAL_PM_STOP_INST_FAST)) { + pnv_default_stop_val = state->psscr_val; + pnv_default_stop_mask = state->psscr_mask; default_stop_found = true; + WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT); } } if (unlikely(!default_stop_found)) { pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n"); } else { - ppc_md.power_save = power9_idle; + ppc_md.power_save = arch300_idle; pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n", pnv_default_stop_val, pnv_default_stop_mask); } @@ -580,13 +1257,40 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags, pnv_deepest_stop_psscr_mask); } - pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop = 0x%llx\n", - pnv_first_deep_stop_state); -out: - kfree(psscr_val); - kfree(psscr_mask); - kfree(residency_ns); - return rc; + pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n", + deep_spr_loss_state); + + pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n", + pnv_first_tb_loss_level); +} + +static void __init pnv_disable_deep_states(void) +{ + /* + * The stop-api is unable to restore hypervisor + * resources on wakeup from platform idle states which + * lose full context. So disable such states. + */ + supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT; + pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n"); + pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n"); + + if (cpu_has_feature(CPU_FTR_ARCH_300) && + (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) { + /* + * Use the default stop state for CPU-Hotplug + * if available. + */ + if (default_stop_found) { + pnv_deepest_stop_psscr_val = pnv_default_stop_val; + pnv_deepest_stop_psscr_mask = pnv_default_stop_mask; + pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n", + pnv_deepest_stop_psscr_val); + } else { /* Fallback to snooze loop for CPU-Hotplug */ + deepest_stop_found = false; + pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n"); + } + } } /* @@ -594,95 +1298,208 @@ out: */ static void __init pnv_probe_idle_states(void) { - struct device_node *np; - int dt_idle_states; - u32 *flags = NULL; int i; + if (nr_pnv_idle_states < 0) { + pr_warn("cpuidle-powernv: no idle states found in the DT\n"); + return; + } + + if (cpu_has_feature(CPU_FTR_ARCH_300)) + pnv_arch300_idle_init(); + + for (i = 0; i < nr_pnv_idle_states; i++) + supported_cpuidle_states |= pnv_idle_states[i].flags; +} + +/* + * This function parses device-tree and populates all the information + * into pnv_idle_states structure. It also sets up nr_pnv_idle_states + * which is the number of cpuidle states discovered through device-tree. + */ + +static int __init pnv_parse_cpuidle_dt(void) +{ + struct device_node *np; + int nr_idle_states, i; + int rc = 0; + u32 *temp_u32; + u64 *temp_u64; + const char **temp_string; + np = of_find_node_by_path("/ibm,opal/power-mgt"); if (!np) { pr_warn("opal: PowerMgmt Node not found\n"); - goto out; + return -ENODEV; } - dt_idle_states = of_property_count_u32_elems(np, - "ibm,cpu-idle-state-flags"); - if (dt_idle_states < 0) { - pr_warn("cpuidle-powernv: no idle states found in the DT\n"); + nr_idle_states = of_property_count_u32_elems(np, + "ibm,cpu-idle-state-flags"); + + pnv_idle_states = kcalloc(nr_idle_states, sizeof(*pnv_idle_states), + GFP_KERNEL); + temp_u32 = kcalloc(nr_idle_states, sizeof(u32), GFP_KERNEL); + temp_u64 = kcalloc(nr_idle_states, sizeof(u64), GFP_KERNEL); + temp_string = kcalloc(nr_idle_states, sizeof(char *), GFP_KERNEL); + + if (!(pnv_idle_states && temp_u32 && temp_u64 && temp_string)) { + pr_err("Could not allocate memory for dt parsing\n"); + rc = -ENOMEM; goto out; } - flags = kcalloc(dt_idle_states, sizeof(*flags), GFP_KERNEL); - - if (of_property_read_u32_array(np, - "ibm,cpu-idle-state-flags", flags, dt_idle_states)) { + /* Read flags */ + if (of_property_read_u32_array(np, "ibm,cpu-idle-state-flags", + temp_u32, nr_idle_states)) { pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n"); + rc = -EINVAL; + goto out; + } + for (i = 0; i < nr_idle_states; i++) + pnv_idle_states[i].flags = temp_u32[i]; + + /* Read latencies */ + if (of_property_read_u32_array(np, "ibm,cpu-idle-state-latencies-ns", + temp_u32, nr_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); + rc = -EINVAL; goto out; } + for (i = 0; i < nr_idle_states; i++) + pnv_idle_states[i].latency_ns = temp_u32[i]; + /* Read residencies */ + if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns", + temp_u32, nr_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n"); + rc = -EINVAL; + goto out; + } + for (i = 0; i < nr_idle_states; i++) + pnv_idle_states[i].residency_ns = temp_u32[i]; + + /* For power9 and later */ if (cpu_has_feature(CPU_FTR_ARCH_300)) { - if (pnv_power9_idle_init(np, flags, dt_idle_states)) + /* Read pm_crtl_val */ + if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr", + temp_u64, nr_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n"); + rc = -EINVAL; + goto out; + } + for (i = 0; i < nr_idle_states; i++) + pnv_idle_states[i].psscr_val = temp_u64[i]; + + /* Read pm_crtl_mask */ + if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr-mask", + temp_u64, nr_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n"); + rc = -EINVAL; goto out; + } + for (i = 0; i < nr_idle_states; i++) + pnv_idle_states[i].psscr_mask = temp_u64[i]; } - for (i = 0; i < dt_idle_states; i++) - supported_cpuidle_states |= flags[i]; + /* + * power8 specific properties ibm,cpu-idle-state-pmicr-mask and + * ibm,cpu-idle-state-pmicr-val were never used and there is no + * plan to use it in near future. Hence, not parsing these properties + */ + if (of_property_read_string_array(np, "ibm,cpu-idle-state-names", + temp_string, nr_idle_states) < 0) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n"); + rc = -EINVAL; + goto out; + } + for (i = 0; i < nr_idle_states; i++) + strscpy(pnv_idle_states[i].name, temp_string[i], + PNV_IDLE_NAME_LEN); + nr_pnv_idle_states = nr_idle_states; + rc = 0; out: - kfree(flags); + kfree(temp_u32); + kfree(temp_u64); + kfree(temp_string); + of_node_put(np); + return rc; } + static int __init pnv_init_idle_states(void) { + int cpu; + int rc = 0; + + /* Set up PACA fields */ + for_each_present_cpu(cpu) { + struct paca_struct *p = paca_ptrs[cpu]; + + p->idle_state = 0; + if (cpu == cpu_first_thread_sibling(cpu)) + p->idle_state = (1 << threads_per_core) - 1; + + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + /* P7/P8 nap */ + p->thread_idle_state = PNV_THREAD_RUNNING; + } else if (pvr_version_is(PVR_POWER9)) { + /* P9 stop workarounds */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + p->requested_psscr = 0; + atomic_set(&p->dont_stop, 0); +#endif + } + } + /* In case we error out nr_pnv_idle_states will be zero */ + nr_pnv_idle_states = 0; supported_cpuidle_states = 0; if (cpuidle_disable != IDLE_NO_OVERRIDE) goto out; - + rc = pnv_parse_cpuidle_dt(); + if (rc) + return rc; pnv_probe_idle_states(); - if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { - patch_instruction( - (unsigned int *)pnv_fastsleep_workaround_at_entry, - PPC_INST_NOP); - patch_instruction( - (unsigned int *)pnv_fastsleep_workaround_at_exit, - PPC_INST_NOP); - } else { - /* - * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that - * workaround is needed to use fastsleep. Provide sysfs - * control to choose how this workaround has to be applied. - */ - device_create_file(cpu_subsys.dev_root, - &dev_attr_fastsleep_workaround_applyonce); - } - - pnv_alloc_idle_core_states(); - - /* - * For each CPU, record its PACA address in each of it's - * sibling thread's PACA at the slot corresponding to this - * CPU's index in the core. - */ - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { - int cpu; - - pr_info("powernv: idle: Saving PACA pointers of all CPUs in their thread sibling PACA\n"); - for_each_possible_cpu(cpu) { - int base_cpu = cpu_first_thread_sibling(cpu); - int idx = cpu_thread_in_core(cpu); - int i; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { + power7_fastsleep_workaround_entry = false; + power7_fastsleep_workaround_exit = false; + } else { + struct device *dev_root; + /* + * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that + * workaround is needed to use fastsleep. Provide sysfs + * control to choose how this workaround has to be + * applied. + */ + dev_root = bus_get_dev_root(&cpu_subsys); + if (dev_root) { + device_create_file(dev_root, + &dev_attr_fastsleep_workaround_applyonce); + put_device(dev_root); + } + } - for (i = 0; i < threads_per_core; i++) { - int j = base_cpu + i; + update_subcore_sibling_mask(); - paca[j].thread_sibling_pacas[idx] = &paca[cpu]; - } + if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) { + ppc_md.power_save = power7_idle; + power7_offline_type = PNV_THREAD_NAP; } + + if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) && + (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)) + power7_offline_type = PNV_THREAD_WINKLE; + else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) || + (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) + power7_offline_type = PNV_THREAD_SLEEP; } - if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) - ppc_md.power_save = power7_idle; + if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) { + if (pnv_save_sprs_for_deep_states()) + pnv_disable_deep_states(); + } out: return 0; |
