diff options
Diffstat (limited to 'drivers/cpufreq/amd-pstate.c')
-rw-r--r-- | drivers/cpufreq/amd-pstate.c | 1247 |
1 files changed, 788 insertions, 459 deletions
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 1791d37fbc53..313550fa62d4 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -22,6 +22,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/bitfield.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> @@ -36,7 +37,7 @@ #include <linux/delay.h> #include <linux/uaccess.h> #include <linux/static_call.h> -#include <linux/amd-pstate.h> +#include <linux/topology.h> #include <acpi/processor.h> #include <acpi/cppc_acpi.h> @@ -45,25 +46,53 @@ #include <asm/processor.h> #include <asm/cpufeature.h> #include <asm/cpu_device_id.h> + +#include "amd-pstate.h" #include "amd-pstate-trace.h" #define AMD_PSTATE_TRANSITION_LATENCY 20000 #define AMD_PSTATE_TRANSITION_DELAY 1000 +#define AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY 600 + +#define AMD_CPPC_EPP_PERFORMANCE 0x00 +#define AMD_CPPC_EPP_BALANCE_PERFORMANCE 0x80 +#define AMD_CPPC_EPP_BALANCE_POWERSAVE 0xBF +#define AMD_CPPC_EPP_POWERSAVE 0xFF + +static const char * const amd_pstate_mode_string[] = { + [AMD_PSTATE_UNDEFINED] = "undefined", + [AMD_PSTATE_DISABLE] = "disable", + [AMD_PSTATE_PASSIVE] = "passive", + [AMD_PSTATE_ACTIVE] = "active", + [AMD_PSTATE_GUIDED] = "guided", + NULL, +}; + +const char *amd_pstate_get_mode_string(enum amd_pstate_mode mode) +{ + if (mode < 0 || mode >= AMD_PSTATE_MAX) + return NULL; + return amd_pstate_mode_string[mode]; +} +EXPORT_SYMBOL_GPL(amd_pstate_get_mode_string); + +struct quirk_entry { + u32 nominal_freq; + u32 lowest_freq; +}; -/* - * TODO: We need more time to fine tune processors with shared memory solution - * with community together. - * - * There are some performance drops on the CPU benchmarks which reports from - * Suse. We are co-working with them to fine tune the shared memory solution. So - * we disable it by default to go acpi-cpufreq on these processors and add a - * module parameter to be able to enable it manually for debugging. - */ static struct cpufreq_driver *current_pstate_driver; static struct cpufreq_driver amd_pstate_driver; static struct cpufreq_driver amd_pstate_epp_driver; static int cppc_state = AMD_PSTATE_UNDEFINED; static bool cppc_enabled; +static bool amd_pstate_prefcore = true; +static struct quirk_entry *quirks; + +#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0) +#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8) +#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16) +#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24) /* * AMD Energy Preference Performance (EPP) @@ -108,6 +137,41 @@ static unsigned int epp_values[] = { typedef int (*cppc_mode_transition_fn)(int); +static struct quirk_entry quirk_amd_7k62 = { + .nominal_freq = 2600, + .lowest_freq = 550, +}; + +static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi) +{ + /** + * match the broken bios for family 17h processor support CPPC V2 + * broken BIOS lack of nominal_freq and lowest_freq capabilities + * definition in ACPI tables + */ + if (cpu_feature_enabled(X86_FEATURE_ZEN2)) { + quirks = dmi->driver_data; + pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident); + return 1; + } + + return 0; +} + +static const struct dmi_system_id amd_pstate_quirks_table[] __initconst = { + { + .callback = dmi_matched_7k62_bios_bug, + .ident = "AMD EPYC 7K62", + .matches = { + DMI_MATCH(DMI_BIOS_VERSION, "5.14"), + DMI_MATCH(DMI_BIOS_RELEASE, "12/12/2019"), + }, + .driver_data = &quirk_amd_7k62, + }, + {} +}; +MODULE_DEVICE_TABLE(dmi, amd_pstate_quirks_table); + static inline int get_mode_idx_from_str(const char *str, size_t size) { int i; @@ -122,99 +186,145 @@ static inline int get_mode_idx_from_str(const char *str, size_t size) static DEFINE_MUTEX(amd_pstate_limits_lock); static DEFINE_MUTEX(amd_pstate_driver_lock); -static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) +static s16 msr_get_epp(struct amd_cpudata *cpudata) +{ + u64 value; + int ret; + + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); + if (ret < 0) { + pr_debug("Could not retrieve energy perf value (%d)\n", ret); + return ret; + } + + return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, value); +} + +DEFINE_STATIC_CALL(amd_pstate_get_epp, msr_get_epp); + +static inline s16 amd_pstate_get_epp(struct amd_cpudata *cpudata) +{ + return static_call(amd_pstate_get_epp)(cpudata); +} + +static s16 shmem_get_epp(struct amd_cpudata *cpudata) { u64 epp; int ret; - if (boot_cpu_has(X86_FEATURE_CPPC)) { - if (!cppc_req_cached) { - epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, - &cppc_req_cached); - if (epp) - return epp; - } - epp = (cppc_req_cached >> 24) & 0xFF; - } else { - ret = cppc_get_epp_perf(cpudata->cpu, &epp); - if (ret < 0) { - pr_debug("Could not retrieve energy perf value (%d)\n", ret); - return -EIO; - } + ret = cppc_get_epp_perf(cpudata->cpu, &epp); + if (ret < 0) { + pr_debug("Could not retrieve energy perf value (%d)\n", ret); + return ret; } return (s16)(epp & 0xff); } -static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata) +static int msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf, + u32 des_perf, u32 max_perf, u32 epp, bool fast_switch) { - s16 epp; - int index = -EINVAL; + u64 value, prev; - epp = amd_pstate_get_epp(cpudata, 0); - if (epp < 0) - return epp; + value = prev = READ_ONCE(cpudata->cppc_req_cached); - switch (epp) { - case AMD_CPPC_EPP_PERFORMANCE: - index = EPP_INDEX_PERFORMANCE; - break; - case AMD_CPPC_EPP_BALANCE_PERFORMANCE: - index = EPP_INDEX_BALANCE_PERFORMANCE; - break; - case AMD_CPPC_EPP_BALANCE_POWERSAVE: - index = EPP_INDEX_BALANCE_POWERSAVE; - break; - case AMD_CPPC_EPP_POWERSAVE: - index = EPP_INDEX_POWERSAVE; - break; - default: - break; + value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | + AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK); + value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); + value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf); + value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + + if (value == prev) + return 0; + + if (fast_switch) { + wrmsrl(MSR_AMD_CPPC_REQ, value); + return 0; + } else { + int ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + + if (ret) + return ret; } - return index; + WRITE_ONCE(cpudata->cppc_req_cached, value); + WRITE_ONCE(cpudata->epp_cached, epp); + + return 0; } -static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) +DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf); + +static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata, + u32 min_perf, u32 des_perf, + u32 max_perf, u32 epp, + bool fast_switch) { + return static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, + max_perf, epp, fast_switch); +} + +static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) +{ + u64 value, prev; int ret; - struct cppc_perf_ctrls perf_ctrls; - if (boot_cpu_has(X86_FEATURE_CPPC)) { - u64 value = READ_ONCE(cpudata->cppc_req_cached); + value = prev = READ_ONCE(cpudata->cppc_req_cached); + value &= ~AMD_CPPC_EPP_PERF_MASK; + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); - value &= ~GENMASK_ULL(31, 24); - value |= (u64)epp << 24; - WRITE_ONCE(cpudata->cppc_req_cached, value); + if (value == prev) + return 0; - ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - if (!ret) - cpudata->epp_cached = epp; - } else { - perf_ctrls.energy_perf = epp; - ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); - if (ret) { - pr_debug("failed to set energy perf value (%d)\n", ret); - return ret; - } - cpudata->epp_cached = epp; + ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + if (ret) { + pr_err("failed to set energy perf value (%d)\n", ret); + return ret; } + /* update both so that msr_update_perf() can effectively check */ + WRITE_ONCE(cpudata->epp_cached, epp); + WRITE_ONCE(cpudata->cppc_req_cached, value); + return ret; } -static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, - int pref_index) +DEFINE_STATIC_CALL(amd_pstate_set_epp, msr_set_epp); + +static inline int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) +{ + return static_call(amd_pstate_set_epp)(cpudata, epp); +} + +static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp) { - int epp = -EINVAL; int ret; + struct cppc_perf_ctrls perf_ctrls; - if (!pref_index) { - pr_debug("EPP pref_index is invalid\n"); - return -EINVAL; + if (epp == cpudata->epp_cached) + return 0; + + perf_ctrls.energy_perf = epp; + ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); + if (ret) { + pr_debug("failed to set energy perf value (%d)\n", ret); + return ret; } + WRITE_ONCE(cpudata->epp_cached, epp); + + return ret; +} - if (epp == -EINVAL) +static int amd_pstate_set_energy_pref_index(struct cpufreq_policy *policy, + int pref_index) +{ + struct amd_cpudata *cpudata = policy->driver_data; + int epp; + + if (!pref_index) + epp = cpudata->epp_default; + else epp = epp_values[pref_index]; if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { @@ -222,21 +332,33 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, return -EBUSY; } - ret = amd_pstate_set_epp(cpudata, epp); + if (trace_amd_pstate_epp_perf_enabled()) { + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, + epp, + FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), + policy->boost_enabled); + } - return ret; + return amd_pstate_set_epp(cpudata, epp); } -static inline int pstate_enable(bool enable) +static inline int msr_cppc_enable(bool enable) { int ret, cpu; unsigned long logical_proc_id_mask = 0; + /* + * MSR_AMD_CPPC_ENABLE is write-once, once set it cannot be cleared. + */ + if (!enable) + return 0; + if (enable == cppc_enabled) return 0; for_each_present_cpu(cpu) { - unsigned long logical_id = topology_logical_die_id(cpu); + unsigned long logical_id = topology_logical_package_id(cpu); if (test_bit(logical_id, &logical_proc_id_mask)) continue; @@ -253,7 +375,7 @@ static inline int pstate_enable(bool enable) return 0; } -static int cppc_enable(bool enable) +static int shmem_cppc_enable(bool enable) { int cpu, ret = 0; struct cppc_perf_ctrls perf_ctrls; @@ -280,60 +402,56 @@ static int cppc_enable(bool enable) return ret; } -DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable); +DEFINE_STATIC_CALL(amd_pstate_cppc_enable, msr_cppc_enable); -static inline int amd_pstate_enable(bool enable) +static inline int amd_pstate_cppc_enable(bool enable) { - return static_call(amd_pstate_enable)(enable); + return static_call(amd_pstate_cppc_enable)(enable); } -static int pstate_init_perf(struct amd_cpudata *cpudata) +static int msr_init_perf(struct amd_cpudata *cpudata) { - u64 cap1; - u32 highest_perf; + u64 cap1, numerator; int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &cap1); if (ret) return ret; - /* - * TODO: Introduce AMD specific power feature. - * - * CPPC entry doesn't indicate the highest performance in some ASICs. - */ - highest_perf = amd_get_highest_perf(); - if (highest_perf > AMD_CPPC_HIGHEST_PERF(cap1)) - highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); + ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator); + if (ret) + return ret; - WRITE_ONCE(cpudata->highest_perf, highest_perf); - WRITE_ONCE(cpudata->max_limit_perf, highest_perf); + WRITE_ONCE(cpudata->highest_perf, numerator); + WRITE_ONCE(cpudata->max_limit_perf, numerator); WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); + WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1)); WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1)); return 0; } -static int cppc_init_perf(struct amd_cpudata *cpudata) +static int shmem_init_perf(struct amd_cpudata *cpudata) { struct cppc_perf_caps cppc_perf; - u32 highest_perf; + u64 numerator; int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); if (ret) return ret; - highest_perf = amd_get_highest_perf(); - if (highest_perf > cppc_perf.highest_perf) - highest_perf = cppc_perf.highest_perf; + ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator); + if (ret) + return ret; - WRITE_ONCE(cpudata->highest_perf, highest_perf); - WRITE_ONCE(cpudata->max_limit_perf, highest_perf); + WRITE_ONCE(cpudata->highest_perf, numerator); + WRITE_ONCE(cpudata->max_limit_perf, numerator); WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf); WRITE_ONCE(cpudata->lowest_nonlinear_perf, cppc_perf.lowest_nonlinear_perf); WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); + WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf); WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf); if (cppc_state == AMD_PSTATE_ACTIVE) @@ -354,44 +472,30 @@ static int cppc_init_perf(struct amd_cpudata *cpudata) return ret; } -DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf); +DEFINE_STATIC_CALL(amd_pstate_init_perf, msr_init_perf); static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) { return static_call(amd_pstate_init_perf)(cpudata); } -static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, - u32 des_perf, u32 max_perf, bool fast_switch) -{ - if (fast_switch) - wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached)); - else - wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, - READ_ONCE(cpudata->cppc_req_cached)); -} - -static void cppc_update_perf(struct amd_cpudata *cpudata, - u32 min_perf, u32 des_perf, - u32 max_perf, bool fast_switch) +static int shmem_update_perf(struct amd_cpudata *cpudata, u32 min_perf, + u32 des_perf, u32 max_perf, u32 epp, bool fast_switch) { struct cppc_perf_ctrls perf_ctrls; + if (cppc_state == AMD_PSTATE_ACTIVE) { + int ret = shmem_set_epp(cpudata, epp); + + if (ret) + return ret; + } + perf_ctrls.max_perf = max_perf; perf_ctrls.min_perf = min_perf; perf_ctrls.desired_perf = des_perf; - cppc_set_perf(cpudata->cpu, &perf_ctrls); -} - -DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf); - -static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, - u32 min_perf, u32 des_perf, - u32 max_perf, bool fast_switch) -{ - static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, - max_perf, fast_switch); + return cppc_set_perf(cpudata->cpu, &perf_ctrls); } static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) @@ -430,58 +534,73 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, u32 max_perf, bool fast_switch, int gov_flags) { - u64 prev = READ_ONCE(cpudata->cppc_req_cached); - u64 value = prev; + unsigned long max_freq; + struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu); + u32 nominal_perf = READ_ONCE(cpudata->nominal_perf); - min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); - max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); + max_freq = READ_ONCE(cpudata->max_limit_freq); + policy->cur = div_u64(des_perf * max_freq, max_perf); + if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) { min_perf = des_perf; des_perf = 0; } - value &= ~AMD_CPPC_MIN_PERF(~0L); - value |= AMD_CPPC_MIN_PERF(min_perf); - - value &= ~AMD_CPPC_DES_PERF(~0L); - value |= AMD_CPPC_DES_PERF(des_perf); - - value &= ~AMD_CPPC_MAX_PERF(~0L); - value |= AMD_CPPC_MAX_PERF(max_perf); + /* limit the max perf when core performance boost feature is disabled */ + if (!cpudata->boost_supported) + max_perf = min_t(unsigned long, nominal_perf, max_perf); if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc, - cpudata->cpu, (value != prev), fast_switch); + cpudata->cpu, fast_switch); } - if (value == prev) - return; + amd_pstate_update_perf(cpudata, min_perf, des_perf, max_perf, 0, fast_switch); - WRITE_ONCE(cpudata->cppc_req_cached, value); - - amd_pstate_update_perf(cpudata, min_perf, des_perf, - max_perf, fast_switch); + cpufreq_cpu_put(policy); } -static int amd_pstate_verify(struct cpufreq_policy_data *policy) +static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) { - cpufreq_verify_within_cpu_limits(policy); + /* + * Initialize lower frequency limit (i.e.policy->min) with + * lowest_nonlinear_frequency which is the most energy efficient + * frequency. Override the initial value set by cpufreq core and + * amd-pstate qos_requests. + */ + if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) { + struct cpufreq_policy *policy = cpufreq_cpu_get(policy_data->cpu); + struct amd_cpudata *cpudata; + + if (!policy) + return -EINVAL; + + cpudata = policy->driver_data; + policy_data->min = cpudata->lowest_nonlinear_freq; + cpufreq_cpu_put(policy); + } + + cpufreq_verify_within_cpu_limits(policy_data); + pr_debug("policy_max =%d, policy_min=%d\n", policy_data->max, policy_data->min); return 0; } static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) { - u32 max_limit_perf, min_limit_perf; + u32 max_limit_perf, min_limit_perf, max_perf, max_freq; struct amd_cpudata *cpudata = policy->driver_data; - max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); - min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); + max_perf = READ_ONCE(cpudata->highest_perf); + max_freq = READ_ONCE(cpudata->max_freq); + max_limit_perf = div_u64(policy->max * max_perf, max_freq); + min_limit_perf = div_u64(policy->min * max_perf, max_freq); + + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) + min_limit_perf = min(cpudata->nominal_perf, max_limit_perf); WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); @@ -553,10 +672,14 @@ static void amd_pstate_adjust_perf(unsigned int cpu, unsigned long capacity) { unsigned long max_perf, min_perf, des_perf, - cap_perf, lowest_nonlinear_perf, max_freq; + cap_perf, lowest_nonlinear_perf; struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - struct amd_cpudata *cpudata = policy->driver_data; - unsigned int target_freq; + struct amd_cpudata *cpudata; + + if (!policy) + return; + + cpudata = policy->driver_data; if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) amd_pstate_update_min_max_limit(policy); @@ -564,151 +687,280 @@ static void amd_pstate_adjust_perf(unsigned int cpu, cap_perf = READ_ONCE(cpudata->highest_perf); lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); - max_freq = READ_ONCE(cpudata->max_freq); des_perf = cap_perf; if (target_perf < capacity) des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity); - min_perf = READ_ONCE(cpudata->highest_perf); + min_perf = READ_ONCE(cpudata->lowest_perf); if (_min_perf < capacity) min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity); if (min_perf < lowest_nonlinear_perf) min_perf = lowest_nonlinear_perf; - max_perf = cap_perf; + max_perf = cpudata->max_limit_perf; if (max_perf < min_perf) max_perf = min_perf; des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); - target_freq = div_u64(des_perf * max_freq, max_perf); - policy->cur = target_freq; amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true, policy->governor->flags); cpufreq_cpu_put(policy); } -static int amd_get_min_freq(struct amd_cpudata *cpudata) +static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on) { - struct cppc_perf_caps cppc_perf; + struct amd_cpudata *cpudata = policy->driver_data; + u32 nominal_freq, max_freq; + int ret = 0; - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); - if (ret) - return ret; + nominal_freq = READ_ONCE(cpudata->nominal_freq); + max_freq = READ_ONCE(cpudata->max_freq); + + if (on) + policy->cpuinfo.max_freq = max_freq; + else if (policy->cpuinfo.max_freq > nominal_freq) + policy->cpuinfo.max_freq = nominal_freq; + + policy->max = policy->cpuinfo.max_freq; - /* Switch to khz */ - return cppc_perf.lowest_freq * 1000; + if (cppc_state == AMD_PSTATE_PASSIVE) { + ret = freq_qos_update_request(&cpudata->req[1], policy->cpuinfo.max_freq); + if (ret < 0) + pr_debug("Failed to update freq constraint: CPU%d\n", cpudata->cpu); + } + + return ret < 0 ? ret : 0; } -static int amd_get_max_freq(struct amd_cpudata *cpudata) +static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) { - struct cppc_perf_caps cppc_perf; - u32 max_perf, max_freq, nominal_freq, nominal_perf; - u64 boost_ratio; + struct amd_cpudata *cpudata = policy->driver_data; + int ret; - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); - if (ret) - return ret; + if (!cpudata->boost_supported) { + pr_err("Boost mode is not supported by this processor or SBIOS\n"); + return -EOPNOTSUPP; + } + guard(mutex)(&amd_pstate_driver_lock); - nominal_freq = cppc_perf.nominal_freq; - nominal_perf = READ_ONCE(cpudata->nominal_perf); - max_perf = READ_ONCE(cpudata->highest_perf); + ret = amd_pstate_cpu_boost_update(policy, state); + refresh_frequency_limits(policy); - boost_ratio = div_u64(max_perf << SCHED_CAPACITY_SHIFT, - nominal_perf); + return ret; +} - max_freq = nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT; +static int amd_pstate_init_boost_support(struct amd_cpudata *cpudata) +{ + u64 boost_val; + int ret = -1; - /* Switch to khz */ - return max_freq * 1000; + /* + * If platform has no CPB support or disable it, initialize current driver + * boost_enabled state to be false, it is not an error for cpufreq core to handle. + */ + if (!cpu_feature_enabled(X86_FEATURE_CPB)) { + pr_debug_once("Boost CPB capabilities not present in the processor\n"); + ret = 0; + goto exit_err; + } + + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val); + if (ret) { + pr_err_once("failed to read initial CPU boost state!\n"); + ret = -EIO; + goto exit_err; + } + + if (!(boost_val & MSR_K7_HWCR_CPB_DIS)) + cpudata->boost_supported = true; + + return 0; + +exit_err: + cpudata->boost_supported = false; + return ret; } -static int amd_get_nominal_freq(struct amd_cpudata *cpudata) +static void amd_perf_ctl_reset(unsigned int cpu) { - struct cppc_perf_caps cppc_perf; + wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); +} - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); - if (ret) - return ret; +/* + * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks + * due to locking, so queue the work for later. + */ +static void amd_pstste_sched_prefcore_workfn(struct work_struct *work) +{ + sched_set_itmt_support(); +} +static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn); + +#define CPPC_MAX_PERF U8_MAX + +static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) +{ + /* user disabled or not detected */ + if (!amd_pstate_prefcore) + return; + + cpudata->hw_prefcore = true; + + /* + * The priorities can be set regardless of whether or not + * sched_set_itmt_support(true) has been called and it is valid to + * update them at any time after it has been called. + */ + sched_set_itmt_core_prio((int)READ_ONCE(cpudata->prefcore_ranking), cpudata->cpu); - /* Switch to khz */ - return cppc_perf.nominal_freq * 1000; + schedule_work(&sched_prefcore_work); } -static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata) +static void amd_pstate_update_limits(unsigned int cpu) { - struct cppc_perf_caps cppc_perf; - u32 lowest_nonlinear_freq, lowest_nonlinear_perf, - nominal_freq, nominal_perf; - u64 lowest_nonlinear_ratio; + struct cpufreq_policy *policy = NULL; + struct amd_cpudata *cpudata; + u32 prev_high = 0, cur_high = 0; + int ret; + bool highest_perf_changed = false; - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); - if (ret) - return ret; + if (!amd_pstate_prefcore) + return; - nominal_freq = cppc_perf.nominal_freq; - nominal_perf = READ_ONCE(cpudata->nominal_perf); + policy = cpufreq_cpu_get(cpu); + if (!policy) + return; - lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf; + cpudata = policy->driver_data; - lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT, - nominal_perf); + guard(mutex)(&amd_pstate_driver_lock); - lowest_nonlinear_freq = nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT; + ret = amd_get_highest_perf(cpu, &cur_high); + if (ret) { + cpufreq_cpu_put(policy); + return; + } + + prev_high = READ_ONCE(cpudata->prefcore_ranking); + highest_perf_changed = (prev_high != cur_high); + if (highest_perf_changed) { + WRITE_ONCE(cpudata->prefcore_ranking, cur_high); + + if (cur_high < CPPC_MAX_PERF) + sched_set_itmt_core_prio((int)cur_high, cpu); + } + cpufreq_cpu_put(policy); + + if (!highest_perf_changed) + cpufreq_update_policy(cpu); - /* Switch to khz */ - return lowest_nonlinear_freq * 1000; } -static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) +/* + * Get pstate transition delay time from ACPI tables that firmware set + * instead of using hardcode value directly. + */ +static u32 amd_pstate_get_transition_delay_us(unsigned int cpu) { - struct amd_cpudata *cpudata = policy->driver_data; - int ret; + u32 transition_delay_ns; - if (!cpudata->boost_supported) { - pr_err("Boost mode is not supported by this processor or SBIOS\n"); - return -EINVAL; + transition_delay_ns = cppc_get_transition_latency(cpu); + if (transition_delay_ns == CPUFREQ_ETERNAL) { + if (cpu_feature_enabled(X86_FEATURE_AMD_FAST_CPPC)) + return AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY; + else + return AMD_PSTATE_TRANSITION_DELAY; } - if (state) - policy->cpuinfo.max_freq = cpudata->max_freq; - else - policy->cpuinfo.max_freq = cpudata->nominal_freq; + return transition_delay_ns / NSEC_PER_USEC; +} - policy->max = policy->cpuinfo.max_freq; +/* + * Get pstate transition latency value from ACPI tables that firmware + * set instead of using hardcode value directly. + */ +static u32 amd_pstate_get_transition_latency(unsigned int cpu) +{ + u32 transition_latency; - ret = freq_qos_update_request(&cpudata->req[1], - policy->cpuinfo.max_freq); - if (ret < 0) - return ret; + transition_latency = cppc_get_transition_latency(cpu); + if (transition_latency == CPUFREQ_ETERNAL) + return AMD_PSTATE_TRANSITION_LATENCY; - return 0; + return transition_latency; } -static void amd_pstate_boost_init(struct amd_cpudata *cpudata) +/* + * amd_pstate_init_freq: Initialize the max_freq, min_freq, + * nominal_freq and lowest_nonlinear_freq for + * the @cpudata object. + * + * Requires: highest_perf, lowest_perf, nominal_perf and + * lowest_nonlinear_perf members of @cpudata to be + * initialized. + * + * Returns 0 on success, non-zero value on failure. + */ +static int amd_pstate_init_freq(struct amd_cpudata *cpudata) { - u32 highest_perf, nominal_perf; + int ret; + u32 min_freq, max_freq; + u32 highest_perf, nominal_perf, nominal_freq; + u32 lowest_nonlinear_perf, lowest_nonlinear_freq; + struct cppc_perf_caps cppc_perf; + + ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); + if (ret) + return ret; + + if (quirks && quirks->lowest_freq) + min_freq = quirks->lowest_freq; + else + min_freq = cppc_perf.lowest_freq; + + if (quirks && quirks->nominal_freq) + nominal_freq = quirks->nominal_freq; + else + nominal_freq = cppc_perf.nominal_freq; highest_perf = READ_ONCE(cpudata->highest_perf); nominal_perf = READ_ONCE(cpudata->nominal_perf); + max_freq = div_u64((u64)highest_perf * nominal_freq, nominal_perf); - if (highest_perf <= nominal_perf) - return; + lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); + lowest_nonlinear_freq = div_u64((u64)nominal_freq * lowest_nonlinear_perf, nominal_perf); + WRITE_ONCE(cpudata->min_freq, min_freq * 1000); + WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq * 1000); + WRITE_ONCE(cpudata->nominal_freq, nominal_freq * 1000); + WRITE_ONCE(cpudata->max_freq, max_freq * 1000); + + /** + * Below values need to be initialized correctly, otherwise driver will fail to load + * max_freq is calculated according to (nominal_freq * highest_perf)/nominal_perf + * lowest_nonlinear_freq is a value between [min_freq, nominal_freq] + * Check _CPC in ACPI table objects if any values are incorrect + */ + if (min_freq <= 0 || max_freq <= 0 || nominal_freq <= 0 || min_freq > max_freq) { + pr_err("min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect\n", + min_freq, max_freq, nominal_freq); + return -EINVAL; + } - cpudata->boost_supported = true; - current_pstate_driver->boost_enabled = true; -} + if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq) { + pr_err("lowest_nonlinear_freq(%d) value is out of range [min_freq(%d), nominal_freq(%d)]\n", + lowest_nonlinear_freq, min_freq, nominal_freq); + return -EINVAL; + } -static void amd_perf_ctl_reset(unsigned int cpu) -{ - wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); + return 0; } static int amd_pstate_cpu_init(struct cpufreq_policy *policy) { - int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; + int min_freq, max_freq, ret; struct device *dev; struct amd_cpudata *cpudata; @@ -731,20 +983,21 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) if (ret) goto free_cpudata1; - min_freq = amd_get_min_freq(cpudata); - max_freq = amd_get_max_freq(cpudata); - nominal_freq = amd_get_nominal_freq(cpudata); - lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); + amd_pstate_init_prefcore(cpudata); - if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { - dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", - min_freq, max_freq); - ret = -EINVAL; + ret = amd_pstate_init_freq(cpudata); + if (ret) goto free_cpudata1; - } - policy->cpuinfo.transition_latency = AMD_PSTATE_TRANSITION_LATENCY; - policy->transition_delay_us = AMD_PSTATE_TRANSITION_DELAY; + ret = amd_pstate_init_boost_support(cpudata); + if (ret) + goto free_cpudata1; + + min_freq = READ_ONCE(cpudata->min_freq); + max_freq = READ_ONCE(cpudata->max_freq); + + policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu); + policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu); policy->min = min_freq; policy->max = max_freq; @@ -752,14 +1005,16 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) policy->cpuinfo.min_freq = min_freq; policy->cpuinfo.max_freq = max_freq; + policy->boost_enabled = READ_ONCE(cpudata->boost_supported); + /* It will be updated by governor */ policy->cur = policy->cpuinfo.min_freq; - if (boot_cpu_has(X86_FEATURE_CPPC)) + if (cpu_feature_enabled(X86_FEATURE_CPPC)) policy->fast_switch_possible = true; ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0], - FREQ_QOS_MIN, policy->cpuinfo.min_freq); + FREQ_QOS_MIN, FREQ_QOS_MIN_DEFAULT_VALUE); if (ret < 0) { dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret); goto free_cpudata1; @@ -772,17 +1027,11 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) goto free_cpudata2; } - /* Initial processor data capability frequencies */ - cpudata->max_freq = max_freq; - cpudata->min_freq = min_freq; cpudata->max_limit_freq = max_freq; cpudata->min_limit_freq = min_freq; - cpudata->nominal_freq = nominal_freq; - cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; policy->driver_data = cpudata; - amd_pstate_boost_init(cpudata); if (!current_pstate_driver->adjust_perf) current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; @@ -795,7 +1044,7 @@ free_cpudata1: return ret; } -static int amd_pstate_cpu_exit(struct cpufreq_policy *policy) +static void amd_pstate_cpu_exit(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; @@ -803,15 +1052,13 @@ static int amd_pstate_cpu_exit(struct cpufreq_policy *policy) freq_qos_remove_request(&cpudata->req[0]); policy->fast_switch_possible = false; kfree(cpudata); - - return 0; } static int amd_pstate_cpu_resume(struct cpufreq_policy *policy) { int ret; - ret = amd_pstate_enable(true); + ret = amd_pstate_cppc_enable(true); if (ret) pr_err("failed to enable amd-pstate during resume, return %d\n", ret); @@ -822,7 +1069,7 @@ static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy) { int ret; - ret = amd_pstate_enable(false); + ret = amd_pstate_cppc_enable(false); if (ret) pr_err("failed to disable amd-pstate during suspend, return %d\n", ret); @@ -842,7 +1089,7 @@ static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, int max_freq; struct amd_cpudata *cpudata = policy->driver_data; - max_freq = amd_get_max_freq(cpudata); + max_freq = READ_ONCE(cpudata->max_freq); if (max_freq < 0) return max_freq; @@ -855,7 +1102,7 @@ static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *poli int freq; struct amd_cpudata *cpudata = policy->driver_data; - freq = amd_get_lowest_nonlinear_freq(cpudata); + freq = READ_ONCE(cpudata->lowest_nonlinear_freq); if (freq < 0) return freq; @@ -877,6 +1124,28 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, return sysfs_emit(buf, "%u\n", perf); } +static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy, + char *buf) +{ + u32 perf; + struct amd_cpudata *cpudata = policy->driver_data; + + perf = READ_ONCE(cpudata->prefcore_ranking); + + return sysfs_emit(buf, "%u\n", perf); +} + +static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy, + char *buf) +{ + bool hw_prefcore; + struct amd_cpudata *cpudata = policy->driver_data; + + hw_prefcore = READ_ONCE(cpudata->hw_prefcore); + + return sysfs_emit(buf, "%s\n", str_enabled_disabled(hw_prefcore)); +} + static ssize_t show_energy_performance_available_preferences( struct cpufreq_policy *policy, char *buf) { @@ -899,7 +1168,6 @@ static ssize_t show_energy_performance_available_preferences( static ssize_t store_energy_performance_preference( struct cpufreq_policy *policy, const char *buf, size_t count) { - struct amd_cpudata *cpudata = policy->driver_data; char str_preference[21]; ssize_t ret; @@ -911,11 +1179,11 @@ static ssize_t store_energy_performance_preference( if (ret < 0) return -EINVAL; - mutex_lock(&amd_pstate_limits_lock); - ret = amd_pstate_set_energy_pref_index(cpudata, ret); - mutex_unlock(&amd_pstate_limits_lock); + guard(mutex)(&amd_pstate_limits_lock); + + ret = amd_pstate_set_energy_pref_index(policy, ret); - return ret ?: count; + return ret ? ret : count; } static ssize_t show_energy_performance_preference( @@ -924,37 +1192,79 @@ static ssize_t show_energy_performance_preference( struct amd_cpudata *cpudata = policy->driver_data; int preference; - preference = amd_pstate_get_energy_pref_index(cpudata); - if (preference < 0) - return preference; + switch (cpudata->epp_cached) { + case AMD_CPPC_EPP_PERFORMANCE: + preference = EPP_INDEX_PERFORMANCE; + break; + case AMD_CPPC_EPP_BALANCE_PERFORMANCE: + preference = EPP_INDEX_BALANCE_PERFORMANCE; + break; + case AMD_CPPC_EPP_BALANCE_POWERSAVE: + preference = EPP_INDEX_BALANCE_POWERSAVE; + break; + case AMD_CPPC_EPP_POWERSAVE: + preference = EPP_INDEX_POWERSAVE; + break; + default: + return -EINVAL; + } return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]); } static void amd_pstate_driver_cleanup(void) { - amd_pstate_enable(false); + amd_pstate_cppc_enable(false); cppc_state = AMD_PSTATE_DISABLE; current_pstate_driver = NULL; } +static int amd_pstate_set_driver(int mode_idx) +{ + if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) { + cppc_state = mode_idx; + if (cppc_state == AMD_PSTATE_DISABLE) + pr_info("driver is explicitly disabled\n"); + + if (cppc_state == AMD_PSTATE_ACTIVE) + current_pstate_driver = &amd_pstate_epp_driver; + + if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED) + current_pstate_driver = &amd_pstate_driver; + + return 0; + } + + return -EINVAL; +} + static int amd_pstate_register_driver(int mode) { int ret; - if (mode == AMD_PSTATE_PASSIVE || mode == AMD_PSTATE_GUIDED) - current_pstate_driver = &amd_pstate_driver; - else if (mode == AMD_PSTATE_ACTIVE) - current_pstate_driver = &amd_pstate_epp_driver; - else - return -EINVAL; + ret = amd_pstate_set_driver(mode); + if (ret) + return ret; cppc_state = mode; + + ret = amd_pstate_cppc_enable(true); + if (ret) { + pr_err("failed to enable cppc during amd-pstate driver registration, return %d\n", + ret); + amd_pstate_driver_cleanup(); + return ret; + } + + /* at least one CPU supports CPB */ + current_pstate_driver->boost_enabled = cpu_feature_enabled(X86_FEATURE_CPB); + ret = cpufreq_register_driver(current_pstate_driver); if (ret) { amd_pstate_driver_cleanup(); return ret; } + return 0; } @@ -971,7 +1281,7 @@ static int amd_pstate_change_mode_without_dvr_change(int mode) cppc_state = mode; - if (boot_cpu_has(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE) + if (cpu_feature_enabled(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE) return 0; for_each_present_cpu(cpu) { @@ -1031,7 +1341,7 @@ static ssize_t amd_pstate_show_status(char *buf) return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]); } -static int amd_pstate_update_status(const char *buf, size_t size) +int amd_pstate_update_status(const char *buf, size_t size) { int mode_idx; @@ -1048,17 +1358,15 @@ static int amd_pstate_update_status(const char *buf, size_t size) return 0; } +EXPORT_SYMBOL_GPL(amd_pstate_update_status); static ssize_t status_show(struct device *dev, struct device_attribute *attr, char *buf) { - ssize_t ret; - mutex_lock(&amd_pstate_driver_lock); - ret = amd_pstate_show_status(buf); - mutex_unlock(&amd_pstate_driver_lock); + guard(mutex)(&amd_pstate_driver_lock); - return ret; + return amd_pstate_show_status(buf); } static ssize_t status_store(struct device *a, struct device_attribute *b, @@ -1067,25 +1375,35 @@ static ssize_t status_store(struct device *a, struct device_attribute *b, char *p = memchr(buf, '\n', count); int ret; - mutex_lock(&amd_pstate_driver_lock); + guard(mutex)(&amd_pstate_driver_lock); ret = amd_pstate_update_status(buf, p ? p - buf : count); - mutex_unlock(&amd_pstate_driver_lock); return ret < 0 ? ret : count; } +static ssize_t prefcore_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore)); +} + cpufreq_freq_attr_ro(amd_pstate_max_freq); cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); cpufreq_freq_attr_ro(amd_pstate_highest_perf); +cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking); +cpufreq_freq_attr_ro(amd_pstate_hw_prefcore); cpufreq_freq_attr_rw(energy_performance_preference); cpufreq_freq_attr_ro(energy_performance_available_preferences); static DEVICE_ATTR_RW(status); +static DEVICE_ATTR_RO(prefcore); static struct freq_attr *amd_pstate_attr[] = { &amd_pstate_max_freq, &amd_pstate_lowest_nonlinear_freq, &amd_pstate_highest_perf, + &amd_pstate_prefcore_ranking, + &amd_pstate_hw_prefcore, NULL, }; @@ -1093,6 +1411,8 @@ static struct freq_attr *amd_pstate_epp_attr[] = { &amd_pstate_max_freq, &amd_pstate_lowest_nonlinear_freq, &amd_pstate_highest_perf, + &amd_pstate_prefcore_ranking, + &amd_pstate_hw_prefcore, &energy_performance_preference, &energy_performance_available_preferences, NULL, @@ -1100,6 +1420,7 @@ static struct freq_attr *amd_pstate_epp_attr[] = { static struct attribute *pstate_global_attributes[] = { &dev_attr_status.attr, + &dev_attr_prefcore.attr, NULL }; @@ -1130,7 +1451,7 @@ static bool amd_pstate_acpi_pm_profile_undefined(void) static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) { - int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; + int min_freq, max_freq, ret; struct amd_cpudata *cpudata; struct device *dev; u64 value; @@ -1149,52 +1470,50 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) return -ENOMEM; cpudata->cpu = policy->cpu; - cpudata->epp_policy = 0; ret = amd_pstate_init_perf(cpudata); if (ret) goto free_cpudata1; - min_freq = amd_get_min_freq(cpudata); - max_freq = amd_get_max_freq(cpudata); - nominal_freq = amd_get_nominal_freq(cpudata); - lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); - if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { - dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", - min_freq, max_freq); - ret = -EINVAL; + amd_pstate_init_prefcore(cpudata); + + ret = amd_pstate_init_freq(cpudata); + if (ret) + goto free_cpudata1; + + ret = amd_pstate_init_boost_support(cpudata); + if (ret) goto free_cpudata1; - } + + min_freq = READ_ONCE(cpudata->min_freq); + max_freq = READ_ONCE(cpudata->max_freq); policy->cpuinfo.min_freq = min_freq; policy->cpuinfo.max_freq = max_freq; /* It will be updated by governor */ policy->cur = policy->cpuinfo.min_freq; - /* Initial processor data capability frequencies */ - cpudata->max_freq = max_freq; - cpudata->min_freq = min_freq; - cpudata->nominal_freq = nominal_freq; - cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; - policy->driver_data = cpudata; - cpudata->epp_cached = amd_pstate_get_epp(cpudata, 0); - policy->min = policy->cpuinfo.min_freq; policy->max = policy->cpuinfo.max_freq; + policy->boost_enabled = READ_ONCE(cpudata->boost_supported); + /* * Set the policy to provide a valid fallback value in case * the default cpufreq governor is neither powersave nor performance. */ if (amd_pstate_acpi_pm_profile_server() || - amd_pstate_acpi_pm_profile_undefined()) + amd_pstate_acpi_pm_profile_undefined()) { policy->policy = CPUFREQ_POLICY_PERFORMANCE; - else + cpudata->epp_default = amd_pstate_get_epp(cpudata); + } else { policy->policy = CPUFREQ_POLICY_POWERSAVE; + cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE; + } - if (boot_cpu_has(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_CPPC)) { ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); if (ret) return ret; @@ -1205,7 +1524,11 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) return ret; WRITE_ONCE(cpudata->cppc_cap1_cached, value); } - amd_pstate_boost_init(cpudata); + ret = amd_pstate_set_epp(cpudata, cpudata->epp_default); + if (ret) + return ret; + + current_pstate_driver->adjust_perf = NULL; return 0; @@ -1214,75 +1537,45 @@ free_cpudata1: return ret; } -static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) +static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) { + struct amd_cpudata *cpudata = policy->driver_data; + + if (cpudata) { + kfree(cpudata); + policy->driver_data = NULL; + } + pr_debug("CPU %d exiting\n", policy->cpu); - return 0; } -static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy) +static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - u32 max_perf, min_perf, min_limit_perf, max_limit_perf; - u64 value; - s16 epp; - - max_perf = READ_ONCE(cpudata->highest_perf); - min_perf = READ_ONCE(cpudata->lowest_perf); - max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); - min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); - - WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); - WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); + u32 epp; - max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); - min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); - value = READ_ONCE(cpudata->cppc_req_cached); - - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) - min_perf = max_perf; - - /* Initial min/max values for CPPC Performance Controls Register */ - value &= ~AMD_CPPC_MIN_PERF(~0L); - value |= AMD_CPPC_MIN_PERF(min_perf); - - value &= ~AMD_CPPC_MAX_PERF(~0L); - value |= AMD_CPPC_MAX_PERF(max_perf); - - /* CPPC EPP feature require to set zero to the desire perf bit */ - value &= ~AMD_CPPC_DES_PERF(~0L); - value |= AMD_CPPC_DES_PERF(0); - - cpudata->epp_policy = cpudata->policy; - - /* Get BIOS pre-defined epp value */ - epp = amd_pstate_get_epp(cpudata, value); - if (epp < 0) { - /** - * This return value can only be negative for shared_memory - * systems where EPP register read/write not supported. - */ - return; - } + amd_pstate_update_min_max_limit(policy); if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) epp = 0; + else + epp = READ_ONCE(cpudata->epp_cached); - /* Set initial EPP value */ - if (boot_cpu_has(X86_FEATURE_CPPC)) { - value &= ~GENMASK_ULL(31, 24); - value |= (u64)epp << 24; + if (trace_amd_pstate_epp_perf_enabled()) { + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp, + cpudata->min_limit_perf, + cpudata->max_limit_perf, + policy->boost_enabled); } - WRITE_ONCE(cpudata->cppc_req_cached, value); - amd_pstate_set_epp(cpudata, epp); + return amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, + cpudata->max_limit_perf, epp, false); } static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; + int ret; if (!policy->cpuinfo.max_freq) return -ENODEV; @@ -1292,96 +1585,76 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) cpudata->policy = policy->policy; - amd_pstate_epp_update_limit(policy); + ret = amd_pstate_epp_update_limit(policy); + if (ret) + return ret; + + /* + * policy->cur is never updated with the amd_pstate_epp driver, but it + * is used as a stale frequency value. So, keep it within limits. + */ + policy->cur = policy->min; return 0; } -static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) +static int amd_pstate_epp_reenable(struct cpufreq_policy *policy) { - struct cppc_perf_ctrls perf_ctrls; - u64 value, max_perf; + struct amd_cpudata *cpudata = policy->driver_data; + u64 max_perf; int ret; - ret = amd_pstate_enable(true); + ret = amd_pstate_cppc_enable(true); if (ret) pr_err("failed to enable amd pstate during resume, return %d\n", ret); - value = READ_ONCE(cpudata->cppc_req_cached); max_perf = READ_ONCE(cpudata->highest_perf); - if (boot_cpu_has(X86_FEATURE_CPPC)) { - wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - } else { - perf_ctrls.max_perf = max_perf; - perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(cpudata->epp_cached); - cppc_set_perf(cpudata->cpu, &perf_ctrls); + if (trace_amd_pstate_epp_perf_enabled()) { + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, + cpudata->epp_cached, + FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), + max_perf, policy->boost_enabled); } + + return amd_pstate_update_perf(cpudata, 0, 0, max_perf, cpudata->epp_cached, false); } static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; + int ret; pr_debug("AMD CPU Core %d going online\n", cpudata->cpu); - if (cppc_state == AMD_PSTATE_ACTIVE) { - amd_pstate_epp_reenable(cpudata); - cpudata->suspended = false; - } + ret = amd_pstate_epp_reenable(policy); + if (ret) + return ret; + cpudata->suspended = false; return 0; } -static void amd_pstate_epp_offline(struct cpufreq_policy *policy) -{ - struct amd_cpudata *cpudata = policy->driver_data; - struct cppc_perf_ctrls perf_ctrls; - int min_perf; - u64 value; - - min_perf = READ_ONCE(cpudata->lowest_perf); - value = READ_ONCE(cpudata->cppc_req_cached); - - mutex_lock(&amd_pstate_limits_lock); - if (boot_cpu_has(X86_FEATURE_CPPC)) { - cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN; - - /* Set max perf same as min perf */ - value &= ~AMD_CPPC_MAX_PERF(~0L); - value |= AMD_CPPC_MAX_PERF(min_perf); - value &= ~AMD_CPPC_MIN_PERF(~0L); - value |= AMD_CPPC_MIN_PERF(min_perf); - wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - } else { - perf_ctrls.desired_perf = 0; - perf_ctrls.max_perf = min_perf; - perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(HWP_EPP_BALANCE_POWERSAVE); - cppc_set_perf(cpudata->cpu, &perf_ctrls); - } - mutex_unlock(&amd_pstate_limits_lock); -} - static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - - pr_debug("AMD CPU Core %d going offline\n", cpudata->cpu); + int min_perf; if (cpudata->suspended) return 0; - if (cppc_state == AMD_PSTATE_ACTIVE) - amd_pstate_epp_offline(policy); + min_perf = READ_ONCE(cpudata->lowest_perf); - return 0; -} + guard(mutex)(&amd_pstate_limits_lock); -static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy) -{ - cpufreq_verify_within_cpu_limits(policy); - pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min); - return 0; + if (trace_amd_pstate_epp_perf_enabled()) { + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, + AMD_CPPC_EPP_BALANCE_POWERSAVE, + min_perf, min_perf, policy->boost_enabled); + } + + return amd_pstate_update_perf(cpudata, min_perf, 0, min_perf, + AMD_CPPC_EPP_BALANCE_POWERSAVE, false); } static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) @@ -1397,7 +1670,7 @@ static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) cpudata->suspended = true; /* disable CPPC in lowlevel firmware */ - ret = amd_pstate_enable(false); + ret = amd_pstate_cppc_enable(false); if (ret) pr_err("failed to suspend, return %d\n", ret); @@ -1409,12 +1682,10 @@ static int amd_pstate_epp_resume(struct cpufreq_policy *policy) struct amd_cpudata *cpudata = policy->driver_data; if (cpudata->suspended) { - mutex_lock(&amd_pstate_limits_lock); + guard(mutex)(&amd_pstate_limits_lock); /* enable amd pstate from suspend state*/ - amd_pstate_epp_reenable(cpudata); - - mutex_unlock(&amd_pstate_limits_lock); + amd_pstate_epp_reenable(policy); cpudata->suspended = false; } @@ -1432,13 +1703,14 @@ static struct cpufreq_driver amd_pstate_driver = { .suspend = amd_pstate_cpu_suspend, .resume = amd_pstate_cpu_resume, .set_boost = amd_pstate_set_boost, + .update_limits = amd_pstate_update_limits, .name = "amd-pstate", .attr = amd_pstate_attr, }; static struct cpufreq_driver amd_pstate_epp_driver = { .flags = CPUFREQ_CONST_LOOPS, - .verify = amd_pstate_epp_verify_policy, + .verify = amd_pstate_verify, .setpolicy = amd_pstate_epp_set_policy, .init = amd_pstate_epp_cpu_init, .exit = amd_pstate_epp_cpu_exit, @@ -1446,27 +1718,64 @@ static struct cpufreq_driver amd_pstate_epp_driver = { .online = amd_pstate_epp_cpu_online, .suspend = amd_pstate_epp_suspend, .resume = amd_pstate_epp_resume, + .update_limits = amd_pstate_update_limits, + .set_boost = amd_pstate_set_boost, .name = "amd-pstate-epp", .attr = amd_pstate_epp_attr, }; -static int __init amd_pstate_set_driver(int mode_idx) +/* + * CPPC function is not supported for family ID 17H with model_ID ranging from 0x10 to 0x2F. + * show the debug message that helps to check if the CPU has CPPC support for loading issue. + */ +static bool amd_cppc_supported(void) { - if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) { - cppc_state = mode_idx; - if (cppc_state == AMD_PSTATE_DISABLE) - pr_info("driver is explicitly disabled\n"); + struct cpuinfo_x86 *c = &cpu_data(0); + bool warn = false; - if (cppc_state == AMD_PSTATE_ACTIVE) - current_pstate_driver = &amd_pstate_epp_driver; - - if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED) - current_pstate_driver = &amd_pstate_driver; + if ((boot_cpu_data.x86 == 0x17) && (boot_cpu_data.x86_model < 0x30)) { + pr_debug_once("CPPC feature is not supported by the processor\n"); + return false; + } - return 0; + /* + * If the CPPC feature is disabled in the BIOS for processors + * that support MSR-based CPPC, the AMD Pstate driver may not + * function correctly. + * + * For such processors, check the CPPC flag and display a + * warning message if the platform supports CPPC. + * + * Note: The code check below will not abort the driver + * registration process because of the code is added for + * debugging purposes. Besides, it may still be possible for + * the driver to work using the shared-memory mechanism. + */ + if (!cpu_feature_enabled(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_ZEN2)) { + switch (c->x86_model) { + case 0x60 ... 0x6F: + case 0x80 ... 0xAF: + warn = true; + break; + } + } else if (cpu_feature_enabled(X86_FEATURE_ZEN3) || + cpu_feature_enabled(X86_FEATURE_ZEN4)) { + switch (c->x86_model) { + case 0x10 ... 0x1F: + case 0x40 ... 0xAF: + warn = true; + break; + } + } else if (cpu_feature_enabled(X86_FEATURE_ZEN5)) { + warn = true; + } } - return -EINVAL; + if (warn) + pr_warn_once("The CPPC feature is supported but currently disabled by the BIOS.\n" + "Please enable it if your BIOS has the CPPC option.\n"); + return true; } static int __init amd_pstate_init(void) @@ -1477,6 +1786,11 @@ static int __init amd_pstate_init(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) return -ENODEV; + /* show debug message only if CPPC is not supported */ + if (!amd_cppc_supported()) + return -EOPNOTSUPP; + + /* show warning message when BIOS broken or ACPI disabled */ if (!acpi_cpc_valid()) { pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n"); return -ENODEV; @@ -1486,55 +1800,59 @@ static int __init amd_pstate_init(void) if (cpufreq_get_current_driver()) return -EEXIST; - switch (cppc_state) { - case AMD_PSTATE_UNDEFINED: + quirks = NULL; + + /* check if this machine need CPPC quirks */ + dmi_check_system(amd_pstate_quirks_table); + + /* + * determine the driver mode from the command line or kernel config. + * If no command line input is provided, cppc_state will be AMD_PSTATE_UNDEFINED. + * command line options will override the kernel config settings. + */ + + if (cppc_state == AMD_PSTATE_UNDEFINED) { /* Disable on the following configs by default: * 1. Undefined platforms - * 2. Server platforms - * 3. Shared memory designs + * 2. Server platforms with CPUs older than Family 0x1A. */ if (amd_pstate_acpi_pm_profile_undefined() || - amd_pstate_acpi_pm_profile_server() || - !boot_cpu_has(X86_FEATURE_CPPC)) { + (amd_pstate_acpi_pm_profile_server() && boot_cpu_data.x86 < 0x1A)) { pr_info("driver load is disabled, boot with specific mode to enable this\n"); return -ENODEV; } - ret = amd_pstate_set_driver(CONFIG_X86_AMD_PSTATE_DEFAULT_MODE); - if (ret) - return ret; - break; - case AMD_PSTATE_DISABLE: + /* get driver mode from kernel config option [1:4] */ + cppc_state = CONFIG_X86_AMD_PSTATE_DEFAULT_MODE; + } + + if (cppc_state == AMD_PSTATE_DISABLE) { + pr_info("driver load is disabled, boot with specific mode to enable this\n"); return -ENODEV; - case AMD_PSTATE_PASSIVE: - case AMD_PSTATE_ACTIVE: - case AMD_PSTATE_GUIDED: - break; - default: - return -EINVAL; } /* capability check */ - if (boot_cpu_has(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_CPPC)) { pr_debug("AMD CPPC MSR based functionality is supported\n"); - if (cppc_state != AMD_PSTATE_ACTIVE) - current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; } else { pr_debug("AMD CPPC shared memory based functionality is supported\n"); - static_call_update(amd_pstate_enable, cppc_enable); - static_call_update(amd_pstate_init_perf, cppc_init_perf); - static_call_update(amd_pstate_update_perf, cppc_update_perf); + static_call_update(amd_pstate_cppc_enable, shmem_cppc_enable); + static_call_update(amd_pstate_init_perf, shmem_init_perf); + static_call_update(amd_pstate_update_perf, shmem_update_perf); + static_call_update(amd_pstate_get_epp, shmem_get_epp); + static_call_update(amd_pstate_set_epp, shmem_set_epp); } - /* enable amd pstate feature */ - ret = amd_pstate_enable(true); - if (ret) { - pr_err("failed to enable with return %d\n", ret); - return ret; + if (amd_pstate_prefcore) { + ret = amd_detect_prefcore(&amd_pstate_prefcore); + if (ret) + return ret; } - ret = cpufreq_register_driver(current_pstate_driver); - if (ret) + ret = amd_pstate_register_driver(cppc_state); + if (ret) { pr_err("failed to register with return %d\n", ret); + return ret; + } dev_root = bus_get_dev_root(&cpu_subsys); if (dev_root) { @@ -1550,6 +1868,7 @@ static int __init amd_pstate_init(void) global_attr_free: cpufreq_unregister_driver(current_pstate_driver); + amd_pstate_cppc_enable(false); return ret; } device_initcall(amd_pstate_init); @@ -1567,7 +1886,17 @@ static int __init amd_pstate_param(char *str) return amd_pstate_set_driver(mode_idx); } + +static int __init amd_prefcore_param(char *str) +{ + if (!strcmp(str, "disable")) + amd_pstate_prefcore = false; + + return 0; +} + early_param("amd_pstate", amd_pstate_param); +early_param("amd_prefcore", amd_prefcore_param); MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>"); MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); |