diff options
Diffstat (limited to 'drivers/cpufreq/amd-pstate.c')
-rw-r--r-- | drivers/cpufreq/amd-pstate.c | 1624 |
1 files changed, 871 insertions, 753 deletions
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 2015c9fcc3c9..f3477ab37742 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -22,6 +22,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/bitfield.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> @@ -36,7 +37,6 @@ #include <linux/delay.h> #include <linux/uaccess.h> #include <linux/static_call.h> -#include <linux/amd-pstate.h> #include <linux/topology.h> #include <acpi/processor.h> @@ -46,27 +46,47 @@ #include <asm/processor.h> #include <asm/cpufeature.h> #include <asm/cpu_device_id.h> + +#include "amd-pstate.h" #include "amd-pstate-trace.h" #define AMD_PSTATE_TRANSITION_LATENCY 20000 #define AMD_PSTATE_TRANSITION_DELAY 1000 -#define AMD_PSTATE_PREFCORE_THRESHOLD 166 +#define AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY 600 + +#define AMD_CPPC_EPP_PERFORMANCE 0x00 +#define AMD_CPPC_EPP_BALANCE_PERFORMANCE 0x80 +#define AMD_CPPC_EPP_BALANCE_POWERSAVE 0xBF +#define AMD_CPPC_EPP_POWERSAVE 0xFF + +static const char * const amd_pstate_mode_string[] = { + [AMD_PSTATE_UNDEFINED] = "undefined", + [AMD_PSTATE_DISABLE] = "disable", + [AMD_PSTATE_PASSIVE] = "passive", + [AMD_PSTATE_ACTIVE] = "active", + [AMD_PSTATE_GUIDED] = "guided", + NULL, +}; + +const char *amd_pstate_get_mode_string(enum amd_pstate_mode mode) +{ + if (mode < 0 || mode >= AMD_PSTATE_MAX) + return NULL; + return amd_pstate_mode_string[mode]; +} +EXPORT_SYMBOL_GPL(amd_pstate_get_mode_string); + +struct quirk_entry { + u32 nominal_freq; + u32 lowest_freq; +}; -/* - * TODO: We need more time to fine tune processors with shared memory solution - * with community together. - * - * There are some performance drops on the CPU benchmarks which reports from - * Suse. We are co-working with them to fine tune the shared memory solution. So - * we disable it by default to go acpi-cpufreq on these processors and add a - * module parameter to be able to enable it manually for debugging. - */ static struct cpufreq_driver *current_pstate_driver; static struct cpufreq_driver amd_pstate_driver; static struct cpufreq_driver amd_pstate_epp_driver; static int cppc_state = AMD_PSTATE_UNDEFINED; -static bool cppc_enabled; static bool amd_pstate_prefcore = true; +static struct quirk_entry *quirks; /* * AMD Energy Preference Performance (EPP) @@ -111,6 +131,54 @@ static unsigned int epp_values[] = { typedef int (*cppc_mode_transition_fn)(int); +static struct quirk_entry quirk_amd_7k62 = { + .nominal_freq = 2600, + .lowest_freq = 550, +}; + +static inline u8 freq_to_perf(union perf_cached perf, u32 nominal_freq, unsigned int freq_val) +{ + u32 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * perf.nominal_perf, nominal_freq); + + return (u8)clamp(perf_val, perf.lowest_perf, perf.highest_perf); +} + +static inline u32 perf_to_freq(union perf_cached perf, u32 nominal_freq, u8 perf_val) +{ + return DIV_ROUND_UP_ULL((u64)nominal_freq * perf_val, + perf.nominal_perf); +} + +static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi) +{ + /** + * match the broken bios for family 17h processor support CPPC V2 + * broken BIOS lack of nominal_freq and lowest_freq capabilities + * definition in ACPI tables + */ + if (cpu_feature_enabled(X86_FEATURE_ZEN2)) { + quirks = dmi->driver_data; + pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident); + return 1; + } + + return 0; +} + +static const struct dmi_system_id amd_pstate_quirks_table[] __initconst = { + { + .callback = dmi_matched_7k62_bios_bug, + .ident = "AMD EPYC 7K62", + .matches = { + DMI_MATCH(DMI_BIOS_VERSION, "5.14"), + DMI_MATCH(DMI_BIOS_RELEASE, "12/12/2019"), + }, + .driver_data = &quirk_amd_7k62, + }, + {} +}; +MODULE_DEVICE_TABLE(dmi, amd_pstate_quirks_table); + static inline int get_mode_idx_from_str(const char *str, size_t size) { int i; @@ -122,231 +190,273 @@ static inline int get_mode_idx_from_str(const char *str, size_t size) return -EINVAL; } -static DEFINE_MUTEX(amd_pstate_limits_lock); static DEFINE_MUTEX(amd_pstate_driver_lock); -static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) +static u8 msr_get_epp(struct amd_cpudata *cpudata) { - u64 epp; + u64 value; int ret; - if (boot_cpu_has(X86_FEATURE_CPPC)) { - if (!cppc_req_cached) { - epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, - &cppc_req_cached); - if (epp) - return epp; - } - epp = (cppc_req_cached >> 24) & 0xFF; - } else { - ret = cppc_get_epp_perf(cpudata->cpu, &epp); - if (ret < 0) { - pr_debug("Could not retrieve energy perf value (%d)\n", ret); - return -EIO; - } + ret = rdmsrq_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); + if (ret < 0) { + pr_debug("Could not retrieve energy perf value (%d)\n", ret); + return ret; } - return (s16)(epp & 0xff); + return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, value); } -static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata) +DEFINE_STATIC_CALL(amd_pstate_get_epp, msr_get_epp); + +static inline s16 amd_pstate_get_epp(struct amd_cpudata *cpudata) { - s16 epp; - int index = -EINVAL; + return static_call(amd_pstate_get_epp)(cpudata); +} - epp = amd_pstate_get_epp(cpudata, 0); - if (epp < 0) - return epp; +static u8 shmem_get_epp(struct amd_cpudata *cpudata) +{ + u64 epp; + int ret; - switch (epp) { - case AMD_CPPC_EPP_PERFORMANCE: - index = EPP_INDEX_PERFORMANCE; - break; - case AMD_CPPC_EPP_BALANCE_PERFORMANCE: - index = EPP_INDEX_BALANCE_PERFORMANCE; - break; - case AMD_CPPC_EPP_BALANCE_POWERSAVE: - index = EPP_INDEX_BALANCE_POWERSAVE; - break; - case AMD_CPPC_EPP_POWERSAVE: - index = EPP_INDEX_POWERSAVE; - break; - default: - break; + ret = cppc_get_epp_perf(cpudata->cpu, &epp); + if (ret < 0) { + pr_debug("Could not retrieve energy perf value (%d)\n", ret); + return ret; } - return index; + return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, epp); } -static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) +static int msr_update_perf(struct cpufreq_policy *policy, u8 min_perf, + u8 des_perf, u8 max_perf, u8 epp, bool fast_switch) { - int ret; - struct cppc_perf_ctrls perf_ctrls; - - if (boot_cpu_has(X86_FEATURE_CPPC)) { - u64 value = READ_ONCE(cpudata->cppc_req_cached); + struct amd_cpudata *cpudata = policy->driver_data; + u64 value, prev; + + value = prev = READ_ONCE(cpudata->cppc_req_cached); + + value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | + AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK); + value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); + value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf); + value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + + if (trace_amd_pstate_epp_perf_enabled()) { + union perf_cached perf = READ_ONCE(cpudata->perf); + + trace_amd_pstate_epp_perf(cpudata->cpu, + perf.highest_perf, + epp, + min_perf, + max_perf, + policy->boost_enabled, + value != prev); + } - value &= ~GENMASK_ULL(31, 24); - value |= (u64)epp << 24; - WRITE_ONCE(cpudata->cppc_req_cached, value); + if (value == prev) + return 0; - ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - if (!ret) - cpudata->epp_cached = epp; + if (fast_switch) { + wrmsrq(MSR_AMD_CPPC_REQ, value); + return 0; } else { - perf_ctrls.energy_perf = epp; - ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); - if (ret) { - pr_debug("failed to set energy perf value (%d)\n", ret); + int ret = wrmsrq_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + + if (ret) return ret; - } - cpudata->epp_cached = epp; } - return ret; + WRITE_ONCE(cpudata->cppc_req_cached, value); + + return 0; } -static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, - int pref_index) +DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf); + +static inline int amd_pstate_update_perf(struct cpufreq_policy *policy, + u8 min_perf, u8 des_perf, + u8 max_perf, u8 epp, + bool fast_switch) { - int epp = -EINVAL; + return static_call(amd_pstate_update_perf)(policy, min_perf, des_perf, + max_perf, epp, fast_switch); +} + +static int msr_set_epp(struct cpufreq_policy *policy, u8 epp) +{ + struct amd_cpudata *cpudata = policy->driver_data; + u64 value, prev; int ret; - if (!pref_index) { - pr_debug("EPP pref_index is invalid\n"); - return -EINVAL; + value = prev = READ_ONCE(cpudata->cppc_req_cached); + value &= ~AMD_CPPC_EPP_PERF_MASK; + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + + if (trace_amd_pstate_epp_perf_enabled()) { + union perf_cached perf = cpudata->perf; + + trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, + epp, + FIELD_GET(AMD_CPPC_MIN_PERF_MASK, + cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_MAX_PERF_MASK, + cpudata->cppc_req_cached), + policy->boost_enabled, + value != prev); } - if (epp == -EINVAL) - epp = epp_values[pref_index]; + if (value == prev) + return 0; - if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { - pr_debug("EPP cannot be set under performance policy\n"); - return -EBUSY; + ret = wrmsrq_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + if (ret) { + pr_err("failed to set energy perf value (%d)\n", ret); + return ret; } - ret = amd_pstate_set_epp(cpudata, epp); + /* update both so that msr_update_perf() can effectively check */ + WRITE_ONCE(cpudata->cppc_req_cached, value); return ret; } -static inline int pstate_enable(bool enable) -{ - int ret, cpu; - unsigned long logical_proc_id_mask = 0; +DEFINE_STATIC_CALL(amd_pstate_set_epp, msr_set_epp); - if (enable == cppc_enabled) - return 0; +static inline int amd_pstate_set_epp(struct cpufreq_policy *policy, u8 epp) +{ + return static_call(amd_pstate_set_epp)(policy, epp); +} - for_each_present_cpu(cpu) { - unsigned long logical_id = topology_logical_die_id(cpu); +static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp) +{ + struct amd_cpudata *cpudata = policy->driver_data; + struct cppc_perf_ctrls perf_ctrls; + u8 epp_cached; + u64 value; + int ret; - if (test_bit(logical_id, &logical_proc_id_mask)) - continue; - set_bit(logical_id, &logical_proc_id_mask); + epp_cached = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached); + if (trace_amd_pstate_epp_perf_enabled()) { + union perf_cached perf = cpudata->perf; - ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE, - enable); - if (ret) - return ret; + trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, + epp, + FIELD_GET(AMD_CPPC_MIN_PERF_MASK, + cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_MAX_PERF_MASK, + cpudata->cppc_req_cached), + policy->boost_enabled, + epp != epp_cached); } - cppc_enabled = enable; - return 0; -} - -static int cppc_enable(bool enable) -{ - int cpu, ret = 0; - struct cppc_perf_ctrls perf_ctrls; - - if (enable == cppc_enabled) + if (epp == epp_cached) return 0; - for_each_present_cpu(cpu) { - ret = cppc_set_enable(cpu, enable); - if (ret) - return ret; - - /* Enable autonomous mode for EPP */ - if (cppc_state == AMD_PSTATE_ACTIVE) { - /* Set desired perf as zero to allow EPP firmware control */ - perf_ctrls.desired_perf = 0; - ret = cppc_set_perf(cpu, &perf_ctrls); - if (ret) - return ret; - } + perf_ctrls.energy_perf = epp; + ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); + if (ret) { + pr_debug("failed to set energy perf value (%d)\n", ret); + return ret; } - cppc_enabled = enable; + value = READ_ONCE(cpudata->cppc_req_cached); + value &= ~AMD_CPPC_EPP_PERF_MASK; + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + WRITE_ONCE(cpudata->cppc_req_cached, value); + return ret; } -DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable); +static inline int msr_cppc_enable(struct cpufreq_policy *policy) +{ + return wrmsrq_safe_on_cpu(policy->cpu, MSR_AMD_CPPC_ENABLE, 1); +} + +static int shmem_cppc_enable(struct cpufreq_policy *policy) +{ + return cppc_set_enable(policy->cpu, 1); +} + +DEFINE_STATIC_CALL(amd_pstate_cppc_enable, msr_cppc_enable); -static inline int amd_pstate_enable(bool enable) +static inline int amd_pstate_cppc_enable(struct cpufreq_policy *policy) { - return static_call(amd_pstate_enable)(enable); + return static_call(amd_pstate_cppc_enable)(policy); } -static int pstate_init_perf(struct amd_cpudata *cpudata) +static int msr_init_perf(struct amd_cpudata *cpudata) { - u64 cap1; - u32 highest_perf; + union perf_cached perf = READ_ONCE(cpudata->perf); + u64 cap1, numerator, cppc_req; + u8 min_perf; - int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, + int ret = rdmsrq_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &cap1); if (ret) return ret; - /* For platforms that do not support the preferred core feature, the - * highest_pef may be configured with 166 or 255, to avoid max frequency - * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as - * the default max perf. + ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator); + if (ret) + return ret; + + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req); + if (ret) + return ret; + + WRITE_ONCE(cpudata->cppc_req_cached, cppc_req); + min_perf = FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cppc_req); + + /* + * Clear out the min_perf part to check if the rest of the MSR is 0, if yes, this is an + * indication that the min_perf value is the one specified through the BIOS option */ - if (cpudata->hw_prefcore) - highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD; - else - highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); - - WRITE_ONCE(cpudata->highest_perf, highest_perf); - WRITE_ONCE(cpudata->max_limit_perf, highest_perf); - WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); - WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); - WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); - WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1)); - WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1)); + cppc_req &= ~(AMD_CPPC_MIN_PERF_MASK); + + if (!cppc_req) + perf.bios_min_perf = min_perf; + + perf.highest_perf = numerator; + perf.max_limit_perf = numerator; + perf.min_limit_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1); + perf.nominal_perf = FIELD_GET(AMD_CPPC_NOMINAL_PERF_MASK, cap1); + perf.lowest_nonlinear_perf = FIELD_GET(AMD_CPPC_LOWNONLIN_PERF_MASK, cap1); + perf.lowest_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1); + WRITE_ONCE(cpudata->perf, perf); + WRITE_ONCE(cpudata->prefcore_ranking, FIELD_GET(AMD_CPPC_HIGHEST_PERF_MASK, cap1)); + return 0; } -static int cppc_init_perf(struct amd_cpudata *cpudata) +static int shmem_init_perf(struct amd_cpudata *cpudata) { struct cppc_perf_caps cppc_perf; - u32 highest_perf; + union perf_cached perf = READ_ONCE(cpudata->perf); + u64 numerator; + bool auto_sel; int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); if (ret) return ret; - if (cpudata->hw_prefcore) - highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD; - else - highest_perf = cppc_perf.highest_perf; - - WRITE_ONCE(cpudata->highest_perf, highest_perf); - WRITE_ONCE(cpudata->max_limit_perf, highest_perf); - WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf); - WRITE_ONCE(cpudata->lowest_nonlinear_perf, - cppc_perf.lowest_nonlinear_perf); - WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); + ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator); + if (ret) + return ret; + + perf.highest_perf = numerator; + perf.max_limit_perf = numerator; + perf.min_limit_perf = cppc_perf.lowest_perf; + perf.nominal_perf = cppc_perf.nominal_perf; + perf.lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf; + perf.lowest_perf = cppc_perf.lowest_perf; + WRITE_ONCE(cpudata->perf, perf); WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf); - WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf); if (cppc_state == AMD_PSTATE_ACTIVE) return 0; - ret = cppc_get_auto_sel_caps(cpudata->cpu, &cppc_perf); + ret = cppc_get_auto_sel(cpudata->cpu, &auto_sel); if (ret) { pr_warn("failed to get auto_sel, ret: %d\n", ret); return 0; @@ -361,44 +471,63 @@ static int cppc_init_perf(struct amd_cpudata *cpudata) return ret; } -DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf); +DEFINE_STATIC_CALL(amd_pstate_init_perf, msr_init_perf); static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) { return static_call(amd_pstate_init_perf)(cpudata); } -static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, - u32 des_perf, u32 max_perf, bool fast_switch) -{ - if (fast_switch) - wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached)); - else - wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, - READ_ONCE(cpudata->cppc_req_cached)); -} - -static void cppc_update_perf(struct amd_cpudata *cpudata, - u32 min_perf, u32 des_perf, - u32 max_perf, bool fast_switch) +static int shmem_update_perf(struct cpufreq_policy *policy, u8 min_perf, + u8 des_perf, u8 max_perf, u8 epp, bool fast_switch) { + struct amd_cpudata *cpudata = policy->driver_data; struct cppc_perf_ctrls perf_ctrls; + u64 value, prev; + int ret; + + if (cppc_state == AMD_PSTATE_ACTIVE) { + int ret = shmem_set_epp(policy, epp); + + if (ret) + return ret; + } + + value = prev = READ_ONCE(cpudata->cppc_req_cached); + + value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | + AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK); + value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); + value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf); + value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + + if (trace_amd_pstate_epp_perf_enabled()) { + union perf_cached perf = READ_ONCE(cpudata->perf); + + trace_amd_pstate_epp_perf(cpudata->cpu, + perf.highest_perf, + epp, + min_perf, + max_perf, + policy->boost_enabled, + value != prev); + } + + if (value == prev) + return 0; perf_ctrls.max_perf = max_perf; perf_ctrls.min_perf = min_perf; perf_ctrls.desired_perf = des_perf; - cppc_set_perf(cpudata->cpu, &perf_ctrls); -} + ret = cppc_set_perf(cpudata->cpu, &perf_ctrls); + if (ret) + return ret; -DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf); + WRITE_ONCE(cpudata->cppc_req_cached, value); -static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, - u32 min_perf, u32 des_perf, - u32 max_perf, bool fast_switch) -{ - static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, - max_perf, fast_switch); + return 0; } static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) @@ -407,8 +536,8 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) unsigned long flags; local_irq_save(flags); - rdmsrl(MSR_IA32_APERF, aperf); - rdmsrl(MSR_IA32_MPERF, mperf); + rdmsrq(MSR_IA32_APERF, aperf); + rdmsrq(MSR_IA32_MPERF, mperf); tsc = rdtsc(); if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) { @@ -434,99 +563,106 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) return true; } -static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, - u32 des_perf, u32 max_perf, bool fast_switch, int gov_flags) +static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf, + u8 des_perf, u8 max_perf, bool fast_switch, int gov_flags) { - u64 prev = READ_ONCE(cpudata->cppc_req_cached); - u64 value = prev; + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu); + union perf_cached perf = READ_ONCE(cpudata->perf); + + if (!policy) + return; + + /* limit the max perf when core performance boost feature is disabled */ + if (!cpudata->boost_supported) + max_perf = min_t(u8, perf.nominal_perf, max_perf); - min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); - max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); - des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); + des_perf = clamp_t(u8, des_perf, min_perf, max_perf); + + policy->cur = perf_to_freq(perf, cpudata->nominal_freq, des_perf); if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) { min_perf = des_perf; des_perf = 0; } - value &= ~AMD_CPPC_MIN_PERF(~0L); - value |= AMD_CPPC_MIN_PERF(min_perf); - - value &= ~AMD_CPPC_DES_PERF(~0L); - value |= AMD_CPPC_DES_PERF(des_perf); - - value &= ~AMD_CPPC_MAX_PERF(~0L); - value |= AMD_CPPC_MAX_PERF(max_perf); - if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc, - cpudata->cpu, (value != prev), fast_switch); + cpudata->cpu, fast_switch); } - if (value == prev) - return; - - WRITE_ONCE(cpudata->cppc_req_cached, value); - - amd_pstate_update_perf(cpudata, min_perf, des_perf, - max_perf, fast_switch); + amd_pstate_update_perf(policy, min_perf, des_perf, max_perf, 0, fast_switch); } -static int amd_pstate_verify(struct cpufreq_policy_data *policy) +static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) { - cpufreq_verify_within_cpu_limits(policy); + /* + * Initialize lower frequency limit (i.e.policy->min) with + * lowest_nonlinear_frequency or the min frequency (if) specified in BIOS, + * Override the initial value set by cpufreq core and amd-pstate qos_requests. + */ + if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) { + struct cpufreq_policy *policy __free(put_cpufreq_policy) = + cpufreq_cpu_get(policy_data->cpu); + struct amd_cpudata *cpudata; + union perf_cached perf; + + if (!policy) + return -EINVAL; + + cpudata = policy->driver_data; + perf = READ_ONCE(cpudata->perf); + + if (perf.bios_min_perf) + policy_data->min = perf_to_freq(perf, cpudata->nominal_freq, + perf.bios_min_perf); + else + policy_data->min = cpudata->lowest_nonlinear_freq; + } + + cpufreq_verify_within_cpu_limits(policy_data); return 0; } -static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) +static void amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) { - u32 max_limit_perf, min_limit_perf, lowest_perf; struct amd_cpudata *cpudata = policy->driver_data; + union perf_cached perf = READ_ONCE(cpudata->perf); - max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); - min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); - - lowest_perf = READ_ONCE(cpudata->lowest_perf); - if (min_limit_perf < lowest_perf) - min_limit_perf = lowest_perf; - - if (max_limit_perf < min_limit_perf) - max_limit_perf = min_limit_perf; - - WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); - WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); + perf.max_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->max); WRITE_ONCE(cpudata->max_limit_freq, policy->max); - WRITE_ONCE(cpudata->min_limit_freq, policy->min); - return 0; + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { + perf.min_limit_perf = min(perf.nominal_perf, perf.max_limit_perf); + WRITE_ONCE(cpudata->min_limit_freq, min(cpudata->nominal_freq, cpudata->max_limit_freq)); + } else { + perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min); + WRITE_ONCE(cpudata->min_limit_freq, policy->min); + } + + WRITE_ONCE(cpudata->perf, perf); } static int amd_pstate_update_freq(struct cpufreq_policy *policy, unsigned int target_freq, bool fast_switch) { struct cpufreq_freqs freqs; - struct amd_cpudata *cpudata = policy->driver_data; - unsigned long max_perf, min_perf, des_perf, cap_perf; + struct amd_cpudata *cpudata; + union perf_cached perf; + u8 des_perf; - if (!cpudata->max_freq) - return -ENODEV; + cpudata = policy->driver_data; if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) amd_pstate_update_min_max_limit(policy); - cap_perf = READ_ONCE(cpudata->highest_perf); - min_perf = READ_ONCE(cpudata->lowest_perf); - max_perf = cap_perf; + perf = READ_ONCE(cpudata->perf); freqs.old = policy->cur; freqs.new = target_freq; - des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf, - cpudata->max_freq); + des_perf = freq_to_perf(perf, cpudata->nominal_freq, target_freq); WARN_ON(fast_switch && !policy->fast_switch_enabled); /* @@ -537,8 +673,9 @@ static int amd_pstate_update_freq(struct cpufreq_policy *policy, if (!fast_switch) cpufreq_freq_transition_begin(policy, &freqs); - amd_pstate_update(cpudata, min_perf, des_perf, - max_perf, fast_switch, policy->governor->flags); + amd_pstate_update(cpudata, perf.min_limit_perf, des_perf, + perf.max_limit_perf, fast_switch, + policy->governor->flags); if (!fast_switch) cpufreq_freq_transition_end(policy, &freqs, false); @@ -566,273 +703,264 @@ static void amd_pstate_adjust_perf(unsigned int cpu, unsigned long target_perf, unsigned long capacity) { - unsigned long max_perf, min_perf, des_perf, - cap_perf, lowest_nonlinear_perf, max_freq; - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - struct amd_cpudata *cpudata = policy->driver_data; - unsigned int target_freq; + u8 max_perf, min_perf, des_perf, cap_perf; + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); + struct amd_cpudata *cpudata; + union perf_cached perf; + + if (!policy) + return; + + cpudata = policy->driver_data; if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) amd_pstate_update_min_max_limit(policy); - - cap_perf = READ_ONCE(cpudata->highest_perf); - lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); - max_freq = READ_ONCE(cpudata->max_freq); + perf = READ_ONCE(cpudata->perf); + cap_perf = perf.highest_perf; des_perf = cap_perf; if (target_perf < capacity) des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity); - min_perf = READ_ONCE(cpudata->lowest_perf); if (_min_perf < capacity) min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity); + else + min_perf = cap_perf; - if (min_perf < lowest_nonlinear_perf) - min_perf = lowest_nonlinear_perf; + if (min_perf < perf.min_limit_perf) + min_perf = perf.min_limit_perf; - max_perf = cap_perf; + max_perf = perf.max_limit_perf; if (max_perf < min_perf) max_perf = min_perf; - des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); - target_freq = div_u64(des_perf * max_freq, max_perf); - policy->cur = target_freq; - amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true, policy->governor->flags); - cpufreq_cpu_put(policy); } -static int amd_get_min_freq(struct amd_cpudata *cpudata) +static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on) { - struct cppc_perf_caps cppc_perf; - - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); - if (ret) - return ret; - - /* Switch to khz */ - return cppc_perf.lowest_freq * 1000; -} - -static int amd_get_max_freq(struct amd_cpudata *cpudata) -{ - struct cppc_perf_caps cppc_perf; - u32 max_perf, max_freq, nominal_freq, nominal_perf; - u64 boost_ratio; + struct amd_cpudata *cpudata = policy->driver_data; + union perf_cached perf = READ_ONCE(cpudata->perf); + u32 nominal_freq, max_freq; + int ret = 0; - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); - if (ret) - return ret; + nominal_freq = READ_ONCE(cpudata->nominal_freq); + max_freq = perf_to_freq(perf, cpudata->nominal_freq, perf.highest_perf); - nominal_freq = cppc_perf.nominal_freq; - nominal_perf = READ_ONCE(cpudata->nominal_perf); - max_perf = READ_ONCE(cpudata->highest_perf); + if (on) + policy->cpuinfo.max_freq = max_freq; + else if (policy->cpuinfo.max_freq > nominal_freq) + policy->cpuinfo.max_freq = nominal_freq; - boost_ratio = div_u64(max_perf << SCHED_CAPACITY_SHIFT, - nominal_perf); + policy->max = policy->cpuinfo.max_freq; - max_freq = nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT; + if (cppc_state == AMD_PSTATE_PASSIVE) { + ret = freq_qos_update_request(&cpudata->req[1], policy->cpuinfo.max_freq); + if (ret < 0) + pr_debug("Failed to update freq constraint: CPU%d\n", cpudata->cpu); + } - /* Switch to khz */ - return max_freq * 1000; + return ret < 0 ? ret : 0; } -static int amd_get_nominal_freq(struct amd_cpudata *cpudata) +static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) { - struct cppc_perf_caps cppc_perf; + struct amd_cpudata *cpudata = policy->driver_data; + int ret; - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); - if (ret) - return ret; + if (!cpudata->boost_supported) { + pr_err("Boost mode is not supported by this processor or SBIOS\n"); + return -EOPNOTSUPP; + } + + ret = amd_pstate_cpu_boost_update(policy, state); + refresh_frequency_limits(policy); - /* Switch to khz */ - return cppc_perf.nominal_freq * 1000; + return ret; } -static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata) +static int amd_pstate_init_boost_support(struct amd_cpudata *cpudata) { - struct cppc_perf_caps cppc_perf; - u32 lowest_nonlinear_freq, lowest_nonlinear_perf, - nominal_freq, nominal_perf; - u64 lowest_nonlinear_ratio; - - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); - if (ret) - return ret; + u64 boost_val; + int ret = -1; - nominal_freq = cppc_perf.nominal_freq; - nominal_perf = READ_ONCE(cpudata->nominal_perf); + /* + * If platform has no CPB support or disable it, initialize current driver + * boost_enabled state to be false, it is not an error for cpufreq core to handle. + */ + if (!cpu_feature_enabled(X86_FEATURE_CPB)) { + pr_debug_once("Boost CPB capabilities not present in the processor\n"); + ret = 0; + goto exit_err; + } - lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf; + ret = rdmsrq_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val); + if (ret) { + pr_err_once("failed to read initial CPU boost state!\n"); + ret = -EIO; + goto exit_err; + } - lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT, - nominal_perf); + if (!(boost_val & MSR_K7_HWCR_CPB_DIS)) + cpudata->boost_supported = true; - lowest_nonlinear_freq = nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT; + return 0; - /* Switch to khz */ - return lowest_nonlinear_freq * 1000; +exit_err: + cpudata->boost_supported = false; + return ret; } -static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) +static void amd_perf_ctl_reset(unsigned int cpu) { - struct amd_cpudata *cpudata = policy->driver_data; - int ret; - - if (!cpudata->boost_supported) { - pr_err("Boost mode is not supported by this processor or SBIOS\n"); - return -EINVAL; - } + wrmsrq_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); +} - if (state) - policy->cpuinfo.max_freq = cpudata->max_freq; - else - policy->cpuinfo.max_freq = cpudata->nominal_freq; +#define CPPC_MAX_PERF U8_MAX - policy->max = policy->cpuinfo.max_freq; +static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) +{ + /* user disabled or not detected */ + if (!amd_pstate_prefcore) + return; - ret = freq_qos_update_request(&cpudata->req[1], - policy->cpuinfo.max_freq); - if (ret < 0) - return ret; + cpudata->hw_prefcore = true; - return 0; + /* Priorities must be initialized before ITMT support can be toggled on. */ + sched_set_itmt_core_prio((int)READ_ONCE(cpudata->prefcore_ranking), cpudata->cpu); } -static void amd_pstate_boost_init(struct amd_cpudata *cpudata) +static void amd_pstate_update_limits(struct cpufreq_policy *policy) { - u32 highest_perf, nominal_perf; + struct amd_cpudata *cpudata; + u32 prev_high = 0, cur_high = 0; + bool highest_perf_changed = false; + unsigned int cpu = policy->cpu; - highest_perf = READ_ONCE(cpudata->highest_perf); - nominal_perf = READ_ONCE(cpudata->nominal_perf); + if (!amd_pstate_prefcore) + return; - if (highest_perf <= nominal_perf) + if (amd_get_highest_perf(cpu, &cur_high)) return; - cpudata->boost_supported = true; - current_pstate_driver->boost_enabled = true; -} + cpudata = policy->driver_data; -static void amd_perf_ctl_reset(unsigned int cpu) -{ - wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); + prev_high = READ_ONCE(cpudata->prefcore_ranking); + highest_perf_changed = (prev_high != cur_high); + if (highest_perf_changed) { + WRITE_ONCE(cpudata->prefcore_ranking, cur_high); + + if (cur_high < CPPC_MAX_PERF) { + sched_set_itmt_core_prio((int)cur_high, cpu); + sched_update_asym_prefer_cpu(cpu, prev_high, cur_high); + } + } } /* - * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks - * due to locking, so queue the work for later. + * Get pstate transition delay time from ACPI tables that firmware set + * instead of using hardcode value directly. */ -static void amd_pstste_sched_prefcore_workfn(struct work_struct *work) +static u32 amd_pstate_get_transition_delay_us(unsigned int cpu) { - sched_set_itmt_support(); + u32 transition_delay_ns; + + transition_delay_ns = cppc_get_transition_latency(cpu); + if (transition_delay_ns == CPUFREQ_ETERNAL) { + if (cpu_feature_enabled(X86_FEATURE_AMD_FAST_CPPC)) + return AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY; + else + return AMD_PSTATE_TRANSITION_DELAY; + } + + return transition_delay_ns / NSEC_PER_USEC; } -static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn); /* - * Get the highest performance register value. - * @cpu: CPU from which to get highest performance. - * @highest_perf: Return address. - * - * Return: 0 for success, -EIO otherwise. + * Get pstate transition latency value from ACPI tables that firmware + * set instead of using hardcode value directly. */ -static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf) +static u32 amd_pstate_get_transition_latency(unsigned int cpu) { - int ret; + u32 transition_latency; - if (boot_cpu_has(X86_FEATURE_CPPC)) { - u64 cap1; + transition_latency = cppc_get_transition_latency(cpu); + if (transition_latency == CPUFREQ_ETERNAL) + return AMD_PSTATE_TRANSITION_LATENCY; - ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1); - if (ret) - return ret; - WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1)); - } else { - u64 cppc_highest_perf; - - ret = cppc_get_highest_perf(cpu, &cppc_highest_perf); - if (ret) - return ret; - WRITE_ONCE(*highest_perf, cppc_highest_perf); - } - - return (ret); + return transition_latency; } -#define CPPC_MAX_PERF U8_MAX - -static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) +/* + * amd_pstate_init_freq: Initialize the nominal_freq and lowest_nonlinear_freq + * for the @cpudata object. + * + * Requires: all perf members of @cpudata to be initialized. + * + * Returns 0 on success, non-zero value on failure. + */ +static int amd_pstate_init_freq(struct amd_cpudata *cpudata) { - int ret, prio; - u32 highest_perf; + u32 min_freq, max_freq, nominal_freq, lowest_nonlinear_freq; + struct cppc_perf_caps cppc_perf; + union perf_cached perf; + int ret; - ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf); + ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); if (ret) - return; - - cpudata->hw_prefcore = true; - /* check if CPPC preferred core feature is enabled*/ - if (highest_perf < CPPC_MAX_PERF) - prio = (int)highest_perf; - else { - pr_debug("AMD CPPC preferred core is unsupported!\n"); - cpudata->hw_prefcore = false; - return; - } - - if (!amd_pstate_prefcore) - return; - - /* - * The priorities can be set regardless of whether or not - * sched_set_itmt_support(true) has been called and it is valid to - * update them at any time after it has been called. - */ - sched_set_itmt_core_prio(prio, cpudata->cpu); + return ret; + perf = READ_ONCE(cpudata->perf); - schedule_work(&sched_prefcore_work); -} + if (quirks && quirks->nominal_freq) + nominal_freq = quirks->nominal_freq; + else + nominal_freq = cppc_perf.nominal_freq; + nominal_freq *= 1000; -static void amd_pstate_update_limits(unsigned int cpu) -{ - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - struct amd_cpudata *cpudata = policy->driver_data; - u32 prev_high = 0, cur_high = 0; - int ret; - bool highest_perf_changed = false; + if (quirks && quirks->lowest_freq) { + min_freq = quirks->lowest_freq; + perf.lowest_perf = freq_to_perf(perf, nominal_freq, min_freq); + WRITE_ONCE(cpudata->perf, perf); + } else + min_freq = cppc_perf.lowest_freq; - mutex_lock(&amd_pstate_driver_lock); - if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore)) - goto free_cpufreq_put; + min_freq *= 1000; - ret = amd_pstate_get_highest_perf(cpu, &cur_high); - if (ret) - goto free_cpufreq_put; + WRITE_ONCE(cpudata->nominal_freq, nominal_freq); - prev_high = READ_ONCE(cpudata->prefcore_ranking); - if (prev_high != cur_high) { - highest_perf_changed = true; - WRITE_ONCE(cpudata->prefcore_ranking, cur_high); + max_freq = perf_to_freq(perf, nominal_freq, perf.highest_perf); + lowest_nonlinear_freq = perf_to_freq(perf, nominal_freq, perf.lowest_nonlinear_perf); + WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq); - if (cur_high < CPPC_MAX_PERF) - sched_set_itmt_core_prio((int)cur_high, cpu); + /** + * Below values need to be initialized correctly, otherwise driver will fail to load + * max_freq is calculated according to (nominal_freq * highest_perf)/nominal_perf + * lowest_nonlinear_freq is a value between [min_freq, nominal_freq] + * Check _CPC in ACPI table objects if any values are incorrect + */ + if (min_freq <= 0 || max_freq <= 0 || nominal_freq <= 0 || min_freq > max_freq) { + pr_err("min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect\n", + min_freq, max_freq, nominal_freq); + return -EINVAL; } -free_cpufreq_put: - cpufreq_cpu_put(policy); - - if (!highest_perf_changed) - cpufreq_update_policy(cpu); + if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq) { + pr_err("lowest_nonlinear_freq(%d) value is out of range [min_freq(%d), nominal_freq(%d)]\n", + lowest_nonlinear_freq, min_freq, nominal_freq); + return -EINVAL; + } - mutex_unlock(&amd_pstate_driver_lock); + return 0; } static int amd_pstate_cpu_init(struct cpufreq_policy *policy) { - int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; - struct device *dev; struct amd_cpudata *cpudata; + union perf_cached perf; + struct device *dev; + int ret; /* * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, @@ -849,41 +977,46 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) cpudata->cpu = policy->cpu; - amd_pstate_init_prefcore(cpudata); - ret = amd_pstate_init_perf(cpudata); if (ret) goto free_cpudata1; - min_freq = amd_get_min_freq(cpudata); - max_freq = amd_get_max_freq(cpudata); - nominal_freq = amd_get_nominal_freq(cpudata); - lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); + amd_pstate_init_prefcore(cpudata); - if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { - dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", - min_freq, max_freq); - ret = -EINVAL; + ret = amd_pstate_init_freq(cpudata); + if (ret) goto free_cpudata1; - } - policy->cpuinfo.transition_latency = AMD_PSTATE_TRANSITION_LATENCY; - policy->transition_delay_us = AMD_PSTATE_TRANSITION_DELAY; + ret = amd_pstate_init_boost_support(cpudata); + if (ret) + goto free_cpudata1; + + policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu); + policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu); + + perf = READ_ONCE(cpudata->perf); - policy->min = min_freq; - policy->max = max_freq; + policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf, + cpudata->nominal_freq, + perf.lowest_perf); + policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf, + cpudata->nominal_freq, + perf.highest_perf); - policy->cpuinfo.min_freq = min_freq; - policy->cpuinfo.max_freq = max_freq; + ret = amd_pstate_cppc_enable(policy); + if (ret) + goto free_cpudata1; + + policy->boost_supported = READ_ONCE(cpudata->boost_supported); /* It will be updated by governor */ policy->cur = policy->cpuinfo.min_freq; - if (boot_cpu_has(X86_FEATURE_CPPC)) + if (cpu_feature_enabled(X86_FEATURE_CPPC)) policy->fast_switch_possible = true; ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0], - FREQ_QOS_MIN, policy->cpuinfo.min_freq); + FREQ_QOS_MIN, FREQ_QOS_MIN_DEFAULT_VALUE); if (ret < 0) { dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret); goto free_cpudata1; @@ -896,17 +1029,8 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) goto free_cpudata2; } - /* Initial processor data capability frequencies */ - cpudata->max_freq = max_freq; - cpudata->min_freq = min_freq; - cpudata->max_limit_freq = max_freq; - cpudata->min_limit_freq = min_freq; - cpudata->nominal_freq = nominal_freq; - cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; - policy->driver_data = cpudata; - amd_pstate_boost_init(cpudata); if (!current_pstate_driver->adjust_perf) current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; @@ -915,42 +1039,23 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) free_cpudata2: freq_qos_remove_request(&cpudata->req[0]); free_cpudata1: + pr_warn("Failed to initialize CPU %d: %d\n", policy->cpu, ret); kfree(cpudata); return ret; } -static int amd_pstate_cpu_exit(struct cpufreq_policy *policy) +static void amd_pstate_cpu_exit(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; + union perf_cached perf = READ_ONCE(cpudata->perf); + + /* Reset CPPC_REQ MSR to the BIOS value */ + amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false); freq_qos_remove_request(&cpudata->req[1]); freq_qos_remove_request(&cpudata->req[0]); policy->fast_switch_possible = false; kfree(cpudata); - - return 0; -} - -static int amd_pstate_cpu_resume(struct cpufreq_policy *policy) -{ - int ret; - - ret = amd_pstate_enable(true); - if (ret) - pr_err("failed to enable amd-pstate during resume, return %d\n", ret); - - return ret; -} - -static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy) -{ - int ret; - - ret = amd_pstate_enable(false); - if (ret) - pr_err("failed to disable amd-pstate during suspend, return %d\n", ret); - - return ret; } /* Sysfs attributes */ @@ -963,27 +1068,27 @@ static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy) static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, char *buf) { - int max_freq; - struct amd_cpudata *cpudata = policy->driver_data; + struct amd_cpudata *cpudata; + union perf_cached perf; - max_freq = amd_get_max_freq(cpudata); - if (max_freq < 0) - return max_freq; + cpudata = policy->driver_data; + perf = READ_ONCE(cpudata->perf); - return sysfs_emit(buf, "%u\n", max_freq); + return sysfs_emit(buf, "%u\n", + perf_to_freq(perf, cpudata->nominal_freq, perf.highest_perf)); } static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy, char *buf) { - int freq; - struct amd_cpudata *cpudata = policy->driver_data; + struct amd_cpudata *cpudata; + union perf_cached perf; - freq = amd_get_lowest_nonlinear_freq(cpudata); - if (freq < 0) - return freq; + cpudata = policy->driver_data; + perf = READ_ONCE(cpudata->perf); - return sysfs_emit(buf, "%u\n", freq); + return sysfs_emit(buf, "%u\n", + perf_to_freq(perf, cpudata->nominal_freq, perf.lowest_nonlinear_perf)); } /* @@ -993,18 +1098,17 @@ static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *poli static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, char *buf) { - u32 perf; - struct amd_cpudata *cpudata = policy->driver_data; + struct amd_cpudata *cpudata; - perf = READ_ONCE(cpudata->highest_perf); + cpudata = policy->driver_data; - return sysfs_emit(buf, "%u\n", perf); + return sysfs_emit(buf, "%u\n", cpudata->perf.highest_perf); } static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy, char *buf) { - u32 perf; + u8 perf; struct amd_cpudata *cpudata = policy->driver_data; perf = READ_ONCE(cpudata->prefcore_ranking); @@ -1048,6 +1152,7 @@ static ssize_t store_energy_performance_preference( struct amd_cpudata *cpudata = policy->driver_data; char str_preference[21]; ssize_t ret; + u8 epp; ret = sscanf(buf, "%20s", str_preference); if (ret != 1) @@ -1057,50 +1162,100 @@ static ssize_t store_energy_performance_preference( if (ret < 0) return -EINVAL; - mutex_lock(&amd_pstate_limits_lock); - ret = amd_pstate_set_energy_pref_index(cpudata, ret); - mutex_unlock(&amd_pstate_limits_lock); + if (!ret) + epp = cpudata->epp_default; + else + epp = epp_values[ret]; + + if (epp > 0 && policy->policy == CPUFREQ_POLICY_PERFORMANCE) { + pr_debug("EPP cannot be set under performance policy\n"); + return -EBUSY; + } + + ret = amd_pstate_set_epp(policy, epp); - return ret ?: count; + return ret ? ret : count; } static ssize_t show_energy_performance_preference( struct cpufreq_policy *policy, char *buf) { struct amd_cpudata *cpudata = policy->driver_data; - int preference; + u8 preference, epp; + + epp = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached); - preference = amd_pstate_get_energy_pref_index(cpudata); - if (preference < 0) - return preference; + switch (epp) { + case AMD_CPPC_EPP_PERFORMANCE: + preference = EPP_INDEX_PERFORMANCE; + break; + case AMD_CPPC_EPP_BALANCE_PERFORMANCE: + preference = EPP_INDEX_BALANCE_PERFORMANCE; + break; + case AMD_CPPC_EPP_BALANCE_POWERSAVE: + preference = EPP_INDEX_BALANCE_POWERSAVE; + break; + case AMD_CPPC_EPP_POWERSAVE: + preference = EPP_INDEX_POWERSAVE; + break; + default: + return -EINVAL; + } return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]); } static void amd_pstate_driver_cleanup(void) { - amd_pstate_enable(false); + if (amd_pstate_prefcore) + sched_clear_itmt_support(); + cppc_state = AMD_PSTATE_DISABLE; current_pstate_driver = NULL; } +static int amd_pstate_set_driver(int mode_idx) +{ + if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) { + cppc_state = mode_idx; + if (cppc_state == AMD_PSTATE_DISABLE) + pr_info("driver is explicitly disabled\n"); + + if (cppc_state == AMD_PSTATE_ACTIVE) + current_pstate_driver = &amd_pstate_epp_driver; + + if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED) + current_pstate_driver = &amd_pstate_driver; + + return 0; + } + + return -EINVAL; +} + static int amd_pstate_register_driver(int mode) { int ret; - if (mode == AMD_PSTATE_PASSIVE || mode == AMD_PSTATE_GUIDED) - current_pstate_driver = &amd_pstate_driver; - else if (mode == AMD_PSTATE_ACTIVE) - current_pstate_driver = &amd_pstate_epp_driver; - else - return -EINVAL; + ret = amd_pstate_set_driver(mode); + if (ret) + return ret; cppc_state = mode; + + /* at least one CPU supports CPB */ + current_pstate_driver->boost_enabled = cpu_feature_enabled(X86_FEATURE_CPB); + ret = cpufreq_register_driver(current_pstate_driver); if (ret) { amd_pstate_driver_cleanup(); return ret; } + + /* Enable ITMT support once all CPUs have initialized their asym priorities. */ + if (amd_pstate_prefcore) + sched_set_itmt_support(); + return 0; } @@ -1117,7 +1272,7 @@ static int amd_pstate_change_mode_without_dvr_change(int mode) cppc_state = mode; - if (boot_cpu_has(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE) + if (cpu_feature_enabled(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE) return 0; for_each_present_cpu(cpu) { @@ -1177,7 +1332,13 @@ static ssize_t amd_pstate_show_status(char *buf) return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]); } -static int amd_pstate_update_status(const char *buf, size_t size) +int amd_pstate_get_status(void) +{ + return cppc_state; +} +EXPORT_SYMBOL_GPL(amd_pstate_get_status); + +int amd_pstate_update_status(const char *buf, size_t size) { int mode_idx; @@ -1189,22 +1350,22 @@ static int amd_pstate_update_status(const char *buf, size_t size) if (mode_idx < 0 || mode_idx >= AMD_PSTATE_MAX) return -EINVAL; - if (mode_state_machine[cppc_state][mode_idx]) + if (mode_state_machine[cppc_state][mode_idx]) { + guard(mutex)(&amd_pstate_driver_lock); return mode_state_machine[cppc_state][mode_idx](mode_idx); + } return 0; } +EXPORT_SYMBOL_GPL(amd_pstate_update_status); static ssize_t status_show(struct device *dev, struct device_attribute *attr, char *buf) { - ssize_t ret; - mutex_lock(&amd_pstate_driver_lock); - ret = amd_pstate_show_status(buf); - mutex_unlock(&amd_pstate_driver_lock); + guard(mutex)(&amd_pstate_driver_lock); - return ret; + return amd_pstate_show_status(buf); } static ssize_t status_store(struct device *a, struct device_attribute *b, @@ -1213,9 +1374,7 @@ static ssize_t status_store(struct device *a, struct device_attribute *b, char *p = memchr(buf, '\n', count); int ret; - mutex_lock(&amd_pstate_driver_lock); ret = amd_pstate_update_status(buf, p ? p - buf : count); - mutex_unlock(&amd_pstate_driver_lock); return ret < 0 ? ret : count; } @@ -1290,10 +1449,10 @@ static bool amd_pstate_acpi_pm_profile_undefined(void) static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) { - int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; struct amd_cpudata *cpudata; + union perf_cached perf; struct device *dev; - u64 value; + int ret; /* * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, @@ -1309,267 +1468,179 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) return -ENOMEM; cpudata->cpu = policy->cpu; - cpudata->epp_policy = 0; - - amd_pstate_init_prefcore(cpudata); ret = amd_pstate_init_perf(cpudata); if (ret) goto free_cpudata1; - min_freq = amd_get_min_freq(cpudata); - max_freq = amd_get_max_freq(cpudata); - nominal_freq = amd_get_nominal_freq(cpudata); - lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); - if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { - dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", - min_freq, max_freq); - ret = -EINVAL; + amd_pstate_init_prefcore(cpudata); + + ret = amd_pstate_init_freq(cpudata); + if (ret) goto free_cpudata1; - } - policy->cpuinfo.min_freq = min_freq; - policy->cpuinfo.max_freq = max_freq; - /* It will be updated by governor */ - policy->cur = policy->cpuinfo.min_freq; + ret = amd_pstate_init_boost_support(cpudata); + if (ret) + goto free_cpudata1; - /* Initial processor data capability frequencies */ - cpudata->max_freq = max_freq; - cpudata->min_freq = min_freq; - cpudata->nominal_freq = nominal_freq; - cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; + perf = READ_ONCE(cpudata->perf); + policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf, + cpudata->nominal_freq, + perf.lowest_perf); + policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf, + cpudata->nominal_freq, + perf.highest_perf); policy->driver_data = cpudata; - cpudata->epp_cached = amd_pstate_get_epp(cpudata, 0); + ret = amd_pstate_cppc_enable(policy); + if (ret) + goto free_cpudata1; + + /* It will be updated by governor */ + policy->cur = policy->cpuinfo.min_freq; - policy->min = policy->cpuinfo.min_freq; - policy->max = policy->cpuinfo.max_freq; + + policy->boost_supported = READ_ONCE(cpudata->boost_supported); /* * Set the policy to provide a valid fallback value in case * the default cpufreq governor is neither powersave nor performance. */ if (amd_pstate_acpi_pm_profile_server() || - amd_pstate_acpi_pm_profile_undefined()) + amd_pstate_acpi_pm_profile_undefined()) { policy->policy = CPUFREQ_POLICY_PERFORMANCE; - else + cpudata->epp_default = amd_pstate_get_epp(cpudata); + } else { policy->policy = CPUFREQ_POLICY_POWERSAVE; + cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE; + } - if (boot_cpu_has(X86_FEATURE_CPPC)) { - ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); - if (ret) - return ret; - WRITE_ONCE(cpudata->cppc_req_cached, value); + ret = amd_pstate_set_epp(policy, cpudata->epp_default); + if (ret) + return ret; - ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value); - if (ret) - return ret; - WRITE_ONCE(cpudata->cppc_cap1_cached, value); - } - amd_pstate_boost_init(cpudata); + current_pstate_driver->adjust_perf = NULL; return 0; free_cpudata1: + pr_warn("Failed to initialize CPU %d: %d\n", policy->cpu, ret); kfree(cpudata); return ret; } -static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) -{ - pr_debug("CPU %d exiting\n", policy->cpu); - return 0; -} - -static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy) +static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - u32 max_perf, min_perf, min_limit_perf, max_limit_perf; - u64 value; - s16 epp; - - max_perf = READ_ONCE(cpudata->highest_perf); - min_perf = READ_ONCE(cpudata->lowest_perf); - max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); - min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); - - if (min_limit_perf < min_perf) - min_limit_perf = min_perf; - - if (max_limit_perf < min_limit_perf) - max_limit_perf = min_limit_perf; - WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); - WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); - - max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); - min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); - value = READ_ONCE(cpudata->cppc_req_cached); - - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) - min_perf = max_perf; + if (cpudata) { + union perf_cached perf = READ_ONCE(cpudata->perf); - /* Initial min/max values for CPPC Performance Controls Register */ - value &= ~AMD_CPPC_MIN_PERF(~0L); - value |= AMD_CPPC_MIN_PERF(min_perf); + /* Reset CPPC_REQ MSR to the BIOS value */ + amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false); - value &= ~AMD_CPPC_MAX_PERF(~0L); - value |= AMD_CPPC_MAX_PERF(max_perf); + kfree(cpudata); + policy->driver_data = NULL; + } - /* CPPC EPP feature require to set zero to the desire perf bit */ - value &= ~AMD_CPPC_DES_PERF(~0L); - value |= AMD_CPPC_DES_PERF(0); + pr_debug("CPU %d exiting\n", policy->cpu); +} - cpudata->epp_policy = cpudata->policy; +static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) +{ + struct amd_cpudata *cpudata = policy->driver_data; + union perf_cached perf; + u8 epp; - /* Get BIOS pre-defined epp value */ - epp = amd_pstate_get_epp(cpudata, value); - if (epp < 0) { - /** - * This return value can only be negative for shared_memory - * systems where EPP register read/write not supported. - */ - return; - } + if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) + amd_pstate_update_min_max_limit(policy); if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) epp = 0; + else + epp = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached); - /* Set initial EPP value */ - if (boot_cpu_has(X86_FEATURE_CPPC)) { - value &= ~GENMASK_ULL(31, 24); - value |= (u64)epp << 24; - } + perf = READ_ONCE(cpudata->perf); - WRITE_ONCE(cpudata->cppc_req_cached, value); - amd_pstate_set_epp(cpudata, epp); + return amd_pstate_update_perf(policy, perf.min_limit_perf, 0U, + perf.max_limit_perf, epp, false); } static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; + int ret; if (!policy->cpuinfo.max_freq) return -ENODEV; - pr_debug("set_policy: cpuinfo.max %u policy->max %u\n", - policy->cpuinfo.max_freq, policy->max); - cpudata->policy = policy->policy; - amd_pstate_epp_update_limit(policy); - - return 0; -} - -static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) -{ - struct cppc_perf_ctrls perf_ctrls; - u64 value, max_perf; - int ret; - - ret = amd_pstate_enable(true); + ret = amd_pstate_epp_update_limit(policy); if (ret) - pr_err("failed to enable amd pstate during resume, return %d\n", ret); + return ret; - value = READ_ONCE(cpudata->cppc_req_cached); - max_perf = READ_ONCE(cpudata->highest_perf); + /* + * policy->cur is never updated with the amd_pstate_epp driver, but it + * is used as a stale frequency value. So, keep it within limits. + */ + policy->cur = policy->min; - if (boot_cpu_has(X86_FEATURE_CPPC)) { - wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - } else { - perf_ctrls.max_perf = max_perf; - perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(cpudata->epp_cached); - cppc_set_perf(cpudata->cpu, &perf_ctrls); - } + return 0; } -static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) +static int amd_pstate_cpu_online(struct cpufreq_policy *policy) { - struct amd_cpudata *cpudata = policy->driver_data; - - pr_debug("AMD CPU Core %d going online\n", cpudata->cpu); - - if (cppc_state == AMD_PSTATE_ACTIVE) { - amd_pstate_epp_reenable(cpudata); - cpudata->suspended = false; - } - - return 0; + return amd_pstate_cppc_enable(policy); } -static void amd_pstate_epp_offline(struct cpufreq_policy *policy) +static int amd_pstate_cpu_offline(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - struct cppc_perf_ctrls perf_ctrls; - int min_perf; - u64 value; + union perf_cached perf = READ_ONCE(cpudata->perf); - min_perf = READ_ONCE(cpudata->lowest_perf); - value = READ_ONCE(cpudata->cppc_req_cached); - - mutex_lock(&amd_pstate_limits_lock); - if (boot_cpu_has(X86_FEATURE_CPPC)) { - cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN; - - /* Set max perf same as min perf */ - value &= ~AMD_CPPC_MAX_PERF(~0L); - value |= AMD_CPPC_MAX_PERF(min_perf); - value &= ~AMD_CPPC_MIN_PERF(~0L); - value |= AMD_CPPC_MIN_PERF(min_perf); - wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - } else { - perf_ctrls.desired_perf = 0; - perf_ctrls.max_perf = min_perf; - perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(HWP_EPP_BALANCE_POWERSAVE); - cppc_set_perf(cpudata->cpu, &perf_ctrls); - } - mutex_unlock(&amd_pstate_limits_lock); + /* + * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified + * min_perf value across kexec reboots. If this CPU is just onlined normally after this, the + * limits, epp and desired perf will get reset to the cached values in cpudata struct + */ + return amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false); } -static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) +static int amd_pstate_suspend(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; + union perf_cached perf = READ_ONCE(cpudata->perf); + int ret; - pr_debug("AMD CPU Core %d going offline\n", cpudata->cpu); - - if (cpudata->suspended) - return 0; + /* + * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified + * min_perf value across kexec reboots. If this CPU is just resumed back without kexec, + * the limits, epp and desired perf will get reset to the cached values in cpudata struct + */ + ret = amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false); + if (ret) + return ret; - if (cppc_state == AMD_PSTATE_ACTIVE) - amd_pstate_epp_offline(policy); + /* invalidate to ensure it's rewritten during resume */ + cpudata->cppc_req_cached = 0; - return 0; -} + /* set this flag to avoid setting core offline*/ + cpudata->suspended = true; -static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy) -{ - cpufreq_verify_within_cpu_limits(policy); - pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min); return 0; } -static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) +static int amd_pstate_resume(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - int ret; - - /* avoid suspending when EPP is not enabled */ - if (cppc_state != AMD_PSTATE_ACTIVE) - return 0; + union perf_cached perf = READ_ONCE(cpudata->perf); + int cur_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->cur); - /* set this flag to avoid setting core offline*/ - cpudata->suspended = true; - - /* disable CPPC in lowlevel firmware */ - ret = amd_pstate_enable(false); - if (ret) - pr_err("failed to suspend, return %d\n", ret); - - return 0; + /* Set CPPC_REQ to last sane value until the governor updates it */ + return amd_pstate_update_perf(policy, perf.min_limit_perf, cur_perf, perf.max_limit_perf, + 0U, false); } static int amd_pstate_epp_resume(struct cpufreq_policy *policy) @@ -1577,12 +1648,12 @@ static int amd_pstate_epp_resume(struct cpufreq_policy *policy) struct amd_cpudata *cpudata = policy->driver_data; if (cpudata->suspended) { - mutex_lock(&amd_pstate_limits_lock); + int ret; /* enable amd pstate from suspend state*/ - amd_pstate_epp_reenable(cpudata); - - mutex_unlock(&amd_pstate_limits_lock); + ret = amd_pstate_epp_update_limit(policy); + if (ret) + return ret; cpudata->suspended = false; } @@ -1597,8 +1668,10 @@ static struct cpufreq_driver amd_pstate_driver = { .fast_switch = amd_pstate_fast_switch, .init = amd_pstate_cpu_init, .exit = amd_pstate_cpu_exit, - .suspend = amd_pstate_cpu_suspend, - .resume = amd_pstate_cpu_resume, + .online = amd_pstate_cpu_online, + .offline = amd_pstate_cpu_offline, + .suspend = amd_pstate_suspend, + .resume = amd_pstate_resume, .set_boost = amd_pstate_set_boost, .update_limits = amd_pstate_update_limits, .name = "amd-pstate", @@ -1607,36 +1680,72 @@ static struct cpufreq_driver amd_pstate_driver = { static struct cpufreq_driver amd_pstate_epp_driver = { .flags = CPUFREQ_CONST_LOOPS, - .verify = amd_pstate_epp_verify_policy, + .verify = amd_pstate_verify, .setpolicy = amd_pstate_epp_set_policy, .init = amd_pstate_epp_cpu_init, .exit = amd_pstate_epp_cpu_exit, - .offline = amd_pstate_epp_cpu_offline, - .online = amd_pstate_epp_cpu_online, - .suspend = amd_pstate_epp_suspend, + .offline = amd_pstate_cpu_offline, + .online = amd_pstate_cpu_online, + .suspend = amd_pstate_suspend, .resume = amd_pstate_epp_resume, .update_limits = amd_pstate_update_limits, + .set_boost = amd_pstate_set_boost, .name = "amd-pstate-epp", .attr = amd_pstate_epp_attr, }; -static int __init amd_pstate_set_driver(int mode_idx) +/* + * CPPC function is not supported for family ID 17H with model_ID ranging from 0x10 to 0x2F. + * show the debug message that helps to check if the CPU has CPPC support for loading issue. + */ +static bool amd_cppc_supported(void) { - if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) { - cppc_state = mode_idx; - if (cppc_state == AMD_PSTATE_DISABLE) - pr_info("driver is explicitly disabled\n"); + struct cpuinfo_x86 *c = &cpu_data(0); + bool warn = false; - if (cppc_state == AMD_PSTATE_ACTIVE) - current_pstate_driver = &amd_pstate_epp_driver; - - if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED) - current_pstate_driver = &amd_pstate_driver; + if ((boot_cpu_data.x86 == 0x17) && (boot_cpu_data.x86_model < 0x30)) { + pr_debug_once("CPPC feature is not supported by the processor\n"); + return false; + } - return 0; + /* + * If the CPPC feature is disabled in the BIOS for processors + * that support MSR-based CPPC, the AMD Pstate driver may not + * function correctly. + * + * For such processors, check the CPPC flag and display a + * warning message if the platform supports CPPC. + * + * Note: The code check below will not abort the driver + * registration process because of the code is added for + * debugging purposes. Besides, it may still be possible for + * the driver to work using the shared-memory mechanism. + */ + if (!cpu_feature_enabled(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_ZEN2)) { + switch (c->x86_model) { + case 0x60 ... 0x6F: + case 0x80 ... 0xAF: + warn = true; + break; + } + } else if (cpu_feature_enabled(X86_FEATURE_ZEN3) || + cpu_feature_enabled(X86_FEATURE_ZEN4)) { + switch (c->x86_model) { + case 0x10 ... 0x1F: + case 0x40 ... 0xAF: + warn = true; + break; + } + } else if (cpu_feature_enabled(X86_FEATURE_ZEN5)) { + warn = true; + } } - return -EINVAL; + if (warn) + pr_warn_once("The CPPC feature is supported but currently disabled by the BIOS.\n" + "Please enable it if your BIOS has the CPPC option.\n"); + return true; } static int __init amd_pstate_init(void) @@ -1647,6 +1756,11 @@ static int __init amd_pstate_init(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) return -ENODEV; + /* show debug message only if CPPC is not supported */ + if (!amd_cppc_supported()) + return -EOPNOTSUPP; + + /* show warning message when BIOS broken or ACPI disabled */ if (!acpi_cpc_valid()) { pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n"); return -ENODEV; @@ -1656,55 +1770,59 @@ static int __init amd_pstate_init(void) if (cpufreq_get_current_driver()) return -EEXIST; - switch (cppc_state) { - case AMD_PSTATE_UNDEFINED: + quirks = NULL; + + /* check if this machine need CPPC quirks */ + dmi_check_system(amd_pstate_quirks_table); + + /* + * determine the driver mode from the command line or kernel config. + * If no command line input is provided, cppc_state will be AMD_PSTATE_UNDEFINED. + * command line options will override the kernel config settings. + */ + + if (cppc_state == AMD_PSTATE_UNDEFINED) { /* Disable on the following configs by default: * 1. Undefined platforms - * 2. Server platforms - * 3. Shared memory designs + * 2. Server platforms with CPUs older than Family 0x1A. */ if (amd_pstate_acpi_pm_profile_undefined() || - amd_pstate_acpi_pm_profile_server() || - !boot_cpu_has(X86_FEATURE_CPPC)) { + (amd_pstate_acpi_pm_profile_server() && boot_cpu_data.x86 < 0x1A)) { pr_info("driver load is disabled, boot with specific mode to enable this\n"); return -ENODEV; } - ret = amd_pstate_set_driver(CONFIG_X86_AMD_PSTATE_DEFAULT_MODE); - if (ret) - return ret; - break; - case AMD_PSTATE_DISABLE: + /* get driver mode from kernel config option [1:4] */ + cppc_state = CONFIG_X86_AMD_PSTATE_DEFAULT_MODE; + } + + if (cppc_state == AMD_PSTATE_DISABLE) { + pr_info("driver load is disabled, boot with specific mode to enable this\n"); return -ENODEV; - case AMD_PSTATE_PASSIVE: - case AMD_PSTATE_ACTIVE: - case AMD_PSTATE_GUIDED: - break; - default: - return -EINVAL; } /* capability check */ - if (boot_cpu_has(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_CPPC)) { pr_debug("AMD CPPC MSR based functionality is supported\n"); - if (cppc_state != AMD_PSTATE_ACTIVE) - current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; } else { pr_debug("AMD CPPC shared memory based functionality is supported\n"); - static_call_update(amd_pstate_enable, cppc_enable); - static_call_update(amd_pstate_init_perf, cppc_init_perf); - static_call_update(amd_pstate_update_perf, cppc_update_perf); + static_call_update(amd_pstate_cppc_enable, shmem_cppc_enable); + static_call_update(amd_pstate_init_perf, shmem_init_perf); + static_call_update(amd_pstate_update_perf, shmem_update_perf); + static_call_update(amd_pstate_get_epp, shmem_get_epp); + static_call_update(amd_pstate_set_epp, shmem_set_epp); } - /* enable amd pstate feature */ - ret = amd_pstate_enable(true); - if (ret) { - pr_err("failed to enable with return %d\n", ret); - return ret; + if (amd_pstate_prefcore) { + ret = amd_detect_prefcore(&amd_pstate_prefcore); + if (ret) + return ret; } - ret = cpufreq_register_driver(current_pstate_driver); - if (ret) + ret = amd_pstate_register_driver(cppc_state); + if (ret) { pr_err("failed to register with return %d\n", ret); + return ret; + } dev_root = bus_get_dev_root(&cpu_subsys); if (dev_root) { |