summaryrefslogtreecommitdiff
path: root/drivers/cpufreq/intel_pstate.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cpufreq/intel_pstate.c')
-rw-r--r--drivers/cpufreq/intel_pstate.c926
1 files changed, 645 insertions, 281 deletions
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index dbbf299f4219..64587d318267 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -16,6 +16,7 @@
#include <linux/tick.h>
#include <linux/slab.h>
#include <linux/sched/cpufreq.h>
+#include <linux/sched/smt.h>
#include <linux/list.h>
#include <linux/cpu.h>
#include <linux/cpufreq.h>
@@ -27,6 +28,7 @@
#include <linux/pm_qos.h>
#include <linux/bitfield.h>
#include <trace/events/power.h>
+#include <linux/units.h>
#include <asm/cpu.h>
#include <asm/div64.h>
@@ -173,7 +175,6 @@ struct vid_data {
* based on the MSR_IA32_MISC_ENABLE value and whether or
* not the maximum reported turbo P-state is different from
* the maximum reported non-turbo one.
- * @turbo_disabled_mf: The @turbo_disabled value reflected by cpuinfo.max_freq.
* @min_perf_pct: Minimum capacity limit in percent of the maximum turbo
* P-state capacity.
* @max_perf_pct: Maximum capacity limit in percent of the maximum turbo
@@ -182,7 +183,6 @@ struct vid_data {
struct global_params {
bool no_turbo;
bool turbo_disabled;
- bool turbo_disabled_mf;
int max_perf_pct;
int min_perf_pct;
};
@@ -213,13 +213,15 @@ struct global_params {
* @epp_policy: Last saved policy used to set EPP/EPB
* @epp_default: Power on default HWP energy performance
* preference/bias
- * @epp_cached Cached HWP energy-performance preference value
+ * @epp_cached: Cached HWP energy-performance preference value
* @hwp_req_cached: Cached value of the last HWP Request MSR
* @hwp_cap_cached: Cached value of the last HWP Capabilities MSR
* @last_io_update: Last time when IO wake flag was set
+ * @capacity_perf: Highest perf used for scale invariance
* @sched_flags: Store scheduler flags for possible cross CPU update
* @hwp_boost_min: Last HWP boosted min performance
* @suspended: Whether or not the driver has been suspended.
+ * @pd_registered: Set when a perf domain is registered for this CPU.
* @hwp_notify_work: workqueue for HWP notifications.
*
* This structure stores per CPU instance data for all CPUs.
@@ -255,9 +257,13 @@ struct cpudata {
u64 hwp_req_cached;
u64 hwp_cap_cached;
u64 last_io_update;
+ unsigned int capacity_perf;
unsigned int sched_flags;
u32 hwp_boost_min;
bool suspended;
+#ifdef CONFIG_ENERGY_MODEL
+ bool pd_registered;
+#endif
struct delayed_work hwp_notify_work;
};
@@ -292,22 +298,25 @@ struct pstate_funcs {
static struct pstate_funcs pstate_funcs __read_mostly;
-static int hwp_active __read_mostly;
-static int hwp_mode_bdw __read_mostly;
-static bool per_cpu_limits __read_mostly;
+static bool hwp_active __ro_after_init;
+static int hwp_mode_bdw __ro_after_init;
+static bool per_cpu_limits __ro_after_init;
+static bool hwp_forced __ro_after_init;
static bool hwp_boost __read_mostly;
-static bool hwp_forced __read_mostly;
+static bool hwp_is_hybrid;
static struct cpufreq_driver *intel_pstate_driver __read_mostly;
-#define HYBRID_SCALING_FACTOR 78741
+#define INTEL_PSTATE_CORE_SCALING 100000
+#define HYBRID_SCALING_FACTOR_ADL 78741
#define HYBRID_SCALING_FACTOR_MTL 80000
+#define HYBRID_SCALING_FACTOR_LNL 86957
-static int hybrid_scaling_factor = HYBRID_SCALING_FACTOR;
+static int hybrid_scaling_factor;
static inline int core_get_scaling(void)
{
- return 100000;
+ return INTEL_PSTATE_CORE_SCALING;
}
#ifdef CONFIG_ACPI
@@ -357,15 +366,14 @@ static void intel_pstate_set_itmt_prio(int cpu)
int ret;
ret = cppc_get_perf_caps(cpu, &cppc_perf);
- if (ret)
- return;
-
/*
- * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff.
- * In this case we can't use CPPC.highest_perf to enable ITMT.
- * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide.
+ * If CPPC is not available, fall back to MSR_HWP_CAPABILITIES bits [8:0].
+ *
+ * Also, on some systems with overclocking enabled, CPPC.highest_perf is
+ * hardcoded to 0xff, so CPPC.highest_perf cannot be used to enable ITMT.
+ * Fall back to MSR_HWP_CAPABILITIES then too.
*/
- if (cppc_perf.highest_perf == CPPC_MAX_PERF)
+ if (ret || cppc_perf.highest_perf == CPPC_MAX_PERF)
cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached));
/*
@@ -412,18 +420,15 @@ static int intel_pstate_get_cppc_guaranteed(int cpu)
static int intel_pstate_cppc_get_scaling(int cpu)
{
struct cppc_perf_caps cppc_perf;
- int ret;
-
- ret = cppc_get_perf_caps(cpu, &cppc_perf);
/*
- * If the nominal frequency and the nominal performance are not
- * zero and the ratio between them is not 100, return the hybrid
- * scaling factor.
+ * Compute the perf-to-frequency scaling factor for the given CPU if
+ * possible, unless it would be 0.
*/
- if (!ret && cppc_perf.nominal_perf && cppc_perf.nominal_freq &&
- cppc_perf.nominal_perf * 100 != cppc_perf.nominal_freq)
- return hybrid_scaling_factor;
+ if (!cppc_get_perf_caps(cpu, &cppc_perf) &&
+ cppc_perf.nominal_perf && cppc_perf.nominal_freq)
+ return div_u64(cppc_perf.nominal_freq * KHZ_PER_MHZ,
+ cppc_perf.nominal_perf);
return core_get_scaling();
}
@@ -594,12 +599,16 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu)
cpu->pstate.min_pstate = intel_pstate_freq_to_hwp(cpu, freq);
}
-static inline void update_turbo_state(void)
+static bool turbo_is_disabled(void)
{
u64 misc_en;
- rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
- global.turbo_disabled = misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
+ if (!cpu_feature_enabled(X86_FEATURE_IDA))
+ return true;
+
+ rdmsrq(MSR_IA32_MISC_ENABLE, misc_en);
+
+ return !!(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
}
static int min_perf_pct_min(void)
@@ -619,7 +628,7 @@ static s16 intel_pstate_get_epb(struct cpudata *cpu_data)
if (!boot_cpu_has(X86_FEATURE_EPB))
return -ENXIO;
- ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
+ ret = rdmsrq_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
if (ret)
return (s16)ret;
@@ -636,7 +645,7 @@ static s16 intel_pstate_get_epp(struct cpudata *cpu_data, u64 hwp_req_data)
* MSR_HWP_REQUEST, so need to read and get EPP.
*/
if (!hwp_req_data) {
- epp = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST,
+ epp = rdmsrq_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST,
&hwp_req_data);
if (epp)
return epp;
@@ -658,12 +667,12 @@ static int intel_pstate_set_epb(int cpu, s16 pref)
if (!boot_cpu_has(X86_FEATURE_EPB))
return -ENXIO;
- ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
+ ret = rdmsrq_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
if (ret)
return ret;
epb = (epb & ~0x0f) | pref;
- wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, epb);
+ wrmsrq_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, epb);
return 0;
}
@@ -761,7 +770,7 @@ static int intel_pstate_set_epp(struct cpudata *cpu, u32 epp)
* function, so it cannot run in parallel with the update below.
*/
WRITE_ONCE(cpu->hwp_req_cached, value);
- ret = wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
+ ret = wrmsrq_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
if (!ret)
cpu->epp_cached = epp;
@@ -915,7 +924,7 @@ static ssize_t show_base_frequency(struct cpufreq_policy *policy, char *buf)
if (ratio <= 0) {
u64 cap;
- rdmsrl_on_cpu(policy->cpu, MSR_HWP_CAPABILITIES, &cap);
+ rdmsrq_on_cpu(policy->cpu, MSR_HWP_CAPABILITIES, &cap);
ratio = HWP_GUARANTEED_PERF(cap);
}
@@ -935,11 +944,276 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
NULL,
};
+static bool no_cas __ro_after_init;
+
+static struct cpudata *hybrid_max_perf_cpu __read_mostly;
+/*
+ * Protects hybrid_max_perf_cpu, the capacity_perf fields in struct cpudata,
+ * and the x86 arch scale-invariance information from concurrent updates.
+ */
+static DEFINE_MUTEX(hybrid_capacity_lock);
+
+#ifdef CONFIG_ENERGY_MODEL
+#define HYBRID_EM_STATE_COUNT 4
+
+static int hybrid_active_power(struct device *dev, unsigned long *power,
+ unsigned long *freq)
+{
+ /*
+ * Create "utilization bins" of 0-40%, 40%-60%, 60%-80%, and 80%-100%
+ * of the maximum capacity such that two CPUs of the same type will be
+ * regarded as equally attractive if the utilization of each of them
+ * falls into the same bin, which should prevent tasks from being
+ * migrated between them too often.
+ *
+ * For this purpose, return the "frequency" of 2 for the first
+ * performance level and otherwise leave the value set by the caller.
+ */
+ if (!*freq)
+ *freq = 2;
+
+ /* No power information. */
+ *power = EM_MAX_POWER;
+
+ return 0;
+}
+
+static int hybrid_get_cost(struct device *dev, unsigned long freq,
+ unsigned long *cost)
+{
+ struct pstate_data *pstate = &all_cpu_data[dev->id]->pstate;
+ struct cpu_cacheinfo *cacheinfo = get_cpu_cacheinfo(dev->id);
+
+ /*
+ * The smaller the perf-to-frequency scaling factor, the larger the IPC
+ * ratio between the given CPU and the least capable CPU in the system.
+ * Regard that IPC ratio as the primary cost component and assume that
+ * the scaling factors for different CPU types will differ by at least
+ * 5% and they will not be above INTEL_PSTATE_CORE_SCALING.
+ *
+ * Add the freq value to the cost, so that the cost of running on CPUs
+ * of the same type in different "utilization bins" is different.
+ */
+ *cost = div_u64(100ULL * INTEL_PSTATE_CORE_SCALING, pstate->scaling) + freq;
+ /*
+ * Increase the cost slightly for CPUs able to access L3 to avoid
+ * touching it in case some other CPUs of the same type can do the work
+ * without it.
+ */
+ if (cacheinfo) {
+ unsigned int i;
+
+ /* Check if L3 cache is there. */
+ for (i = 0; i < cacheinfo->num_leaves; i++) {
+ if (cacheinfo->info_list[i].level == 3) {
+ *cost += 2;
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static bool hybrid_register_perf_domain(unsigned int cpu)
+{
+ static const struct em_data_callback cb
+ = EM_ADV_DATA_CB(hybrid_active_power, hybrid_get_cost);
+ struct cpudata *cpudata = all_cpu_data[cpu];
+ struct device *cpu_dev;
+
+ /*
+ * Registering EM perf domains without enabling asymmetric CPU capacity
+ * support is not really useful and one domain should not be registered
+ * more than once.
+ */
+ if (!hybrid_max_perf_cpu || cpudata->pd_registered)
+ return false;
+
+ cpu_dev = get_cpu_device(cpu);
+ if (!cpu_dev)
+ return false;
+
+ if (em_dev_register_perf_domain(cpu_dev, HYBRID_EM_STATE_COUNT, &cb,
+ cpumask_of(cpu), false))
+ return false;
+
+ cpudata->pd_registered = true;
+
+ return true;
+}
+
+static void hybrid_register_all_perf_domains(void)
+{
+ unsigned int cpu;
+
+ for_each_online_cpu(cpu)
+ hybrid_register_perf_domain(cpu);
+}
+
+static void hybrid_update_perf_domain(struct cpudata *cpu)
+{
+ if (cpu->pd_registered)
+ em_adjust_cpu_capacity(cpu->cpu);
+}
+#else /* !CONFIG_ENERGY_MODEL */
+static inline bool hybrid_register_perf_domain(unsigned int cpu) { return false; }
+static inline void hybrid_register_all_perf_domains(void) {}
+static inline void hybrid_update_perf_domain(struct cpudata *cpu) {}
+#endif /* CONFIG_ENERGY_MODEL */
+
+static void hybrid_set_cpu_capacity(struct cpudata *cpu)
+{
+ arch_set_cpu_capacity(cpu->cpu, cpu->capacity_perf,
+ hybrid_max_perf_cpu->capacity_perf,
+ cpu->capacity_perf,
+ cpu->pstate.max_pstate_physical);
+ hybrid_update_perf_domain(cpu);
+
+ topology_set_cpu_scale(cpu->cpu, arch_scale_cpu_capacity(cpu->cpu));
+
+ pr_debug("CPU%d: perf = %u, max. perf = %u, base perf = %d\n", cpu->cpu,
+ cpu->capacity_perf, hybrid_max_perf_cpu->capacity_perf,
+ cpu->pstate.max_pstate_physical);
+}
+
+static void hybrid_clear_cpu_capacity(unsigned int cpunum)
+{
+ arch_set_cpu_capacity(cpunum, 1, 1, 1, 1);
+}
+
+static void hybrid_get_capacity_perf(struct cpudata *cpu)
+{
+ if (READ_ONCE(global.no_turbo)) {
+ cpu->capacity_perf = cpu->pstate.max_pstate_physical;
+ return;
+ }
+
+ cpu->capacity_perf = HWP_HIGHEST_PERF(READ_ONCE(cpu->hwp_cap_cached));
+}
+
+static void hybrid_set_capacity_of_cpus(void)
+{
+ int cpunum;
+
+ for_each_online_cpu(cpunum) {
+ struct cpudata *cpu = all_cpu_data[cpunum];
+
+ if (cpu)
+ hybrid_set_cpu_capacity(cpu);
+ }
+}
+
+static void hybrid_update_cpu_capacity_scaling(void)
+{
+ struct cpudata *max_perf_cpu = NULL;
+ unsigned int max_cap_perf = 0;
+ int cpunum;
+
+ for_each_online_cpu(cpunum) {
+ struct cpudata *cpu = all_cpu_data[cpunum];
+
+ if (!cpu)
+ continue;
+
+ /*
+ * During initialization, CPU performance at full capacity needs
+ * to be determined.
+ */
+ if (!hybrid_max_perf_cpu)
+ hybrid_get_capacity_perf(cpu);
+
+ /*
+ * If hybrid_max_perf_cpu is not NULL at this point, it is
+ * being replaced, so don't take it into account when looking
+ * for the new one.
+ */
+ if (cpu == hybrid_max_perf_cpu)
+ continue;
+
+ if (cpu->capacity_perf > max_cap_perf) {
+ max_cap_perf = cpu->capacity_perf;
+ max_perf_cpu = cpu;
+ }
+ }
+
+ if (max_perf_cpu) {
+ hybrid_max_perf_cpu = max_perf_cpu;
+ hybrid_set_capacity_of_cpus();
+ } else {
+ pr_info("Found no CPUs with nonzero maximum performance\n");
+ /* Revert to the flat CPU capacity structure. */
+ for_each_online_cpu(cpunum)
+ hybrid_clear_cpu_capacity(cpunum);
+ }
+}
+
+static void __hybrid_refresh_cpu_capacity_scaling(void)
+{
+ hybrid_max_perf_cpu = NULL;
+ hybrid_update_cpu_capacity_scaling();
+}
+
+static void hybrid_refresh_cpu_capacity_scaling(void)
+{
+ guard(mutex)(&hybrid_capacity_lock);
+
+ __hybrid_refresh_cpu_capacity_scaling();
+ /*
+ * Perf domains are not registered before setting hybrid_max_perf_cpu,
+ * so register them all after setting up CPU capacity scaling.
+ */
+ hybrid_register_all_perf_domains();
+}
+
+static void hybrid_init_cpu_capacity_scaling(bool refresh)
+{
+ /* Bail out if enabling capacity-aware scheduling is prohibited. */
+ if (no_cas)
+ return;
+
+ /*
+ * If hybrid_max_perf_cpu is set at this point, the hybrid CPU capacity
+ * scaling has been enabled already and the driver is just changing the
+ * operation mode.
+ */
+ if (refresh) {
+ hybrid_refresh_cpu_capacity_scaling();
+ return;
+ }
+
+ /*
+ * On hybrid systems, use asym capacity instead of ITMT, but because
+ * the capacity of SMT threads is not deterministic even approximately,
+ * do not do that when SMT is in use.
+ */
+ if (hwp_is_hybrid && !sched_smt_active() && arch_enable_hybrid_capacity_scale()) {
+ hybrid_refresh_cpu_capacity_scaling();
+ /*
+ * Disabling ITMT causes sched domains to be rebuilt to disable asym
+ * packing and enable asym capacity and EAS.
+ */
+ sched_clear_itmt_support();
+ }
+}
+
+static bool hybrid_clear_max_perf_cpu(void)
+{
+ bool ret;
+
+ guard(mutex)(&hybrid_capacity_lock);
+
+ ret = !!hybrid_max_perf_cpu;
+ hybrid_max_perf_cpu = NULL;
+
+ return ret;
+}
+
static void __intel_pstate_get_hwp_cap(struct cpudata *cpu)
{
u64 cap;
- rdmsrl_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap);
+ rdmsrq_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap);
WRITE_ONCE(cpu->hwp_cap_cached, cap);
cpu->pstate.max_pstate = HWP_GUARANTEED_PERF(cap);
cpu->pstate.turbo_pstate = HWP_HIGHEST_PERF(cap);
@@ -963,6 +1237,51 @@ static void intel_pstate_get_hwp_cap(struct cpudata *cpu)
}
}
+static void hybrid_update_capacity(struct cpudata *cpu)
+{
+ unsigned int max_cap_perf;
+
+ mutex_lock(&hybrid_capacity_lock);
+
+ if (!hybrid_max_perf_cpu)
+ goto unlock;
+
+ /*
+ * The maximum performance of the CPU may have changed, but assume
+ * that the performance of the other CPUs has not changed.
+ */
+ max_cap_perf = hybrid_max_perf_cpu->capacity_perf;
+
+ intel_pstate_get_hwp_cap(cpu);
+
+ hybrid_get_capacity_perf(cpu);
+ /* Should hybrid_max_perf_cpu be replaced by this CPU? */
+ if (cpu->capacity_perf > max_cap_perf) {
+ hybrid_max_perf_cpu = cpu;
+ hybrid_set_capacity_of_cpus();
+ goto unlock;
+ }
+
+ /* If this CPU is hybrid_max_perf_cpu, should it be replaced? */
+ if (cpu == hybrid_max_perf_cpu && cpu->capacity_perf < max_cap_perf) {
+ hybrid_update_cpu_capacity_scaling();
+ goto unlock;
+ }
+
+ hybrid_set_cpu_capacity(cpu);
+ /*
+ * If the CPU was offline to start with and it is going online for the
+ * first time, a perf domain needs to be registered for it if hybrid
+ * capacity scaling has been enabled already. In that case, sched
+ * domains need to be rebuilt to take the new perf domain into account.
+ */
+ if (hybrid_register_perf_domain(cpu->cpu))
+ em_rebuild_sched_domains();
+
+unlock:
+ mutex_unlock(&hybrid_capacity_lock);
+}
+
static void intel_pstate_hwp_set(unsigned int cpu)
{
struct cpudata *cpu_data = all_cpu_data[cpu];
@@ -976,7 +1295,7 @@ static void intel_pstate_hwp_set(unsigned int cpu)
if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
min = max;
- rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
+ rdmsrq_on_cpu(cpu, MSR_HWP_REQUEST, &value);
value &= ~HWP_MIN_PERF(~0L);
value |= HWP_MIN_PERF(min);
@@ -1023,7 +1342,7 @@ static void intel_pstate_hwp_set(unsigned int cpu)
}
skip_epp:
WRITE_ONCE(cpu_data->hwp_req_cached, value);
- wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
+ wrmsrq_on_cpu(cpu, MSR_HWP_REQUEST, value);
}
static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata);
@@ -1070,7 +1389,23 @@ static void intel_pstate_hwp_offline(struct cpudata *cpu)
if (boot_cpu_has(X86_FEATURE_HWP_EPP))
value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE);
- wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
+ wrmsrq_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
+
+ mutex_lock(&hybrid_capacity_lock);
+
+ if (!hybrid_max_perf_cpu) {
+ mutex_unlock(&hybrid_capacity_lock);
+
+ return;
+ }
+
+ if (hybrid_max_perf_cpu == cpu)
+ hybrid_update_cpu_capacity_scaling();
+
+ mutex_unlock(&hybrid_capacity_lock);
+
+ /* Reset the capacity of the CPU going offline to the initial value. */
+ hybrid_clear_cpu_capacity(cpu->cpu);
}
#define POWER_CTL_EE_ENABLE 1
@@ -1083,7 +1418,7 @@ static void set_power_ctl_ee_state(bool input)
u64 power_ctl;
mutex_lock(&intel_pstate_driver_lock);
- rdmsrl(MSR_IA32_POWER_CTL, power_ctl);
+ rdmsrq(MSR_IA32_POWER_CTL, power_ctl);
if (input) {
power_ctl &= ~BIT(MSR_IA32_POWER_CTL_BIT_EE);
power_ctl_ee_state = POWER_CTL_EE_ENABLE;
@@ -1091,7 +1426,7 @@ static void set_power_ctl_ee_state(bool input)
power_ctl |= BIT(MSR_IA32_POWER_CTL_BIT_EE);
power_ctl_ee_state = POWER_CTL_EE_DISABLE;
}
- wrmsrl(MSR_IA32_POWER_CTL, power_ctl);
+ wrmsrq(MSR_IA32_POWER_CTL, power_ctl);
mutex_unlock(&intel_pstate_driver_lock);
}
@@ -1100,7 +1435,7 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata);
static void intel_pstate_hwp_reenable(struct cpudata *cpu)
{
intel_pstate_hwp_enable(cpu);
- wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, READ_ONCE(cpu->hwp_req_cached));
+ wrmsrq_on_cpu(cpu->cpu, MSR_HWP_REQUEST, READ_ONCE(cpu->hwp_req_cached));
}
static int intel_pstate_suspend(struct cpufreq_policy *policy)
@@ -1151,45 +1486,55 @@ static void intel_pstate_update_policies(void)
cpufreq_update_policy(cpu);
}
-static void __intel_pstate_update_max_freq(struct cpudata *cpudata,
- struct cpufreq_policy *policy)
+static void __intel_pstate_update_max_freq(struct cpufreq_policy *policy,
+ struct cpudata *cpudata)
{
- policy->cpuinfo.max_freq = global.turbo_disabled_mf ?
+ guard(cpufreq_policy_write)(policy);
+
+ if (hwp_active)
+ intel_pstate_get_hwp_cap(cpudata);
+
+ policy->cpuinfo.max_freq = READ_ONCE(global.no_turbo) ?
cpudata->pstate.max_freq : cpudata->pstate.turbo_freq;
+
refresh_frequency_limits(policy);
}
-static void intel_pstate_update_max_freq(unsigned int cpu)
+static bool intel_pstate_update_max_freq(struct cpudata *cpudata)
{
- struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);
+ struct cpufreq_policy *policy __free(put_cpufreq_policy);
+ policy = cpufreq_cpu_get(cpudata->cpu);
if (!policy)
- return;
+ return false;
- __intel_pstate_update_max_freq(all_cpu_data[cpu], policy);
+ __intel_pstate_update_max_freq(policy, cpudata);
- cpufreq_cpu_release(policy);
+ return true;
}
-static void intel_pstate_update_limits(unsigned int cpu)
+static void intel_pstate_update_limits(struct cpufreq_policy *policy)
{
- mutex_lock(&intel_pstate_driver_lock);
+ struct cpudata *cpudata = all_cpu_data[policy->cpu];
- update_turbo_state();
- /*
- * If turbo has been turned on or off globally, policy limits for
- * all CPUs need to be updated to reflect that.
- */
- if (global.turbo_disabled_mf != global.turbo_disabled) {
- global.turbo_disabled_mf = global.turbo_disabled;
- arch_set_max_freq_ratio(global.turbo_disabled);
- for_each_possible_cpu(cpu)
- intel_pstate_update_max_freq(cpu);
- } else {
- cpufreq_update_policy(cpu);
- }
+ __intel_pstate_update_max_freq(policy, cpudata);
- mutex_unlock(&intel_pstate_driver_lock);
+ hybrid_update_capacity(cpudata);
+}
+
+static void intel_pstate_update_limits_for_all(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ intel_pstate_update_max_freq(all_cpu_data[cpu]);
+
+ mutex_lock(&hybrid_capacity_lock);
+
+ if (hybrid_max_perf_cpu)
+ __hybrid_refresh_cpu_capacity_scaling();
+
+ mutex_unlock(&hybrid_capacity_lock);
}
/************************** sysfs begin ************************/
@@ -1287,11 +1632,7 @@ static ssize_t show_no_turbo(struct kobject *kobj,
return -EAGAIN;
}
- update_turbo_state();
- if (global.turbo_disabled)
- ret = sprintf(buf, "%u\n", global.turbo_disabled);
- else
- ret = sprintf(buf, "%u\n", global.no_turbo);
+ ret = sprintf(buf, "%u\n", global.no_turbo);
mutex_unlock(&intel_pstate_driver_lock);
@@ -1302,32 +1643,39 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b,
const char *buf, size_t count)
{
unsigned int input;
- int ret;
+ bool no_turbo;
- ret = sscanf(buf, "%u", &input);
- if (ret != 1)
+ if (sscanf(buf, "%u", &input) != 1)
return -EINVAL;
mutex_lock(&intel_pstate_driver_lock);
if (!intel_pstate_driver) {
- mutex_unlock(&intel_pstate_driver_lock);
- return -EAGAIN;
+ count = -EAGAIN;
+ goto unlock_driver;
}
- mutex_lock(&intel_pstate_limits_lock);
+ no_turbo = !!clamp_t(int, input, 0, 1);
- update_turbo_state();
- if (global.turbo_disabled) {
- pr_notice_once("Turbo disabled by BIOS or unavailable on processor\n");
- mutex_unlock(&intel_pstate_limits_lock);
- mutex_unlock(&intel_pstate_driver_lock);
- return -EPERM;
+ WRITE_ONCE(global.turbo_disabled, turbo_is_disabled());
+ if (global.turbo_disabled && !no_turbo) {
+ pr_notice("Turbo disabled by BIOS or unavailable on processor\n");
+ count = -EPERM;
+ if (global.no_turbo)
+ goto unlock_driver;
+ else
+ no_turbo = 1;
+ }
+
+ if (no_turbo == global.no_turbo) {
+ goto unlock_driver;
}
- global.no_turbo = clamp_t(int, input, 0, 1);
+ WRITE_ONCE(global.no_turbo, no_turbo);
+
+ mutex_lock(&intel_pstate_limits_lock);
- if (global.no_turbo) {
+ if (no_turbo) {
struct cpudata *cpu = all_cpu_data[0];
int pct = cpu->pstate.max_pstate * 100 / cpu->pstate.turbo_pstate;
@@ -1338,9 +1686,10 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b,
mutex_unlock(&intel_pstate_limits_lock);
- intel_pstate_update_policies();
- arch_set_max_freq_ratio(global.no_turbo);
+ intel_pstate_update_limits_for_all();
+ arch_set_max_freq_ratio(no_turbo);
+unlock_driver:
mutex_unlock(&intel_pstate_driver_lock);
return count;
@@ -1481,7 +1830,7 @@ static ssize_t show_energy_efficiency(struct kobject *kobj, struct kobj_attribut
u64 power_ctl;
int enable;
- rdmsrl(MSR_IA32_POWER_CTL, power_ctl);
+ rdmsrq(MSR_IA32_POWER_CTL, power_ctl);
enable = !!(power_ctl & BIT(MSR_IA32_POWER_CTL_BIT_EE));
return sprintf(buf, "%d\n", !enable);
}
@@ -1618,98 +1967,97 @@ static void intel_pstate_notify_work(struct work_struct *work)
{
struct cpudata *cpudata =
container_of(to_delayed_work(work), struct cpudata, hwp_notify_work);
- struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu);
- if (policy) {
- intel_pstate_get_hwp_cap(cpudata);
- __intel_pstate_update_max_freq(cpudata, policy);
-
- cpufreq_cpu_release(policy);
+ if (intel_pstate_update_max_freq(cpudata)) {
+ /*
+ * The driver will not be unregistered while this function is
+ * running, so update the capacity without acquiring the driver
+ * lock.
+ */
+ hybrid_update_capacity(cpudata);
}
- wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0);
+ wrmsrq_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0);
}
-static DEFINE_SPINLOCK(hwp_notify_lock);
+static DEFINE_RAW_SPINLOCK(hwp_notify_lock);
static cpumask_t hwp_intr_enable_mask;
+#define HWP_GUARANTEED_PERF_CHANGE_STATUS BIT(0)
+#define HWP_HIGHEST_PERF_CHANGE_STATUS BIT(3)
+
void notify_hwp_interrupt(void)
{
unsigned int this_cpu = smp_processor_id();
- struct cpudata *cpudata;
+ u64 value, status_mask;
unsigned long flags;
- u64 value;
- if (!READ_ONCE(hwp_active) || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY))
+ if (!hwp_active || !cpu_feature_enabled(X86_FEATURE_HWP_NOTIFY))
return;
- rdmsrl_safe(MSR_HWP_STATUS, &value);
- if (!(value & 0x01))
+ status_mask = HWP_GUARANTEED_PERF_CHANGE_STATUS;
+ if (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE))
+ status_mask |= HWP_HIGHEST_PERF_CHANGE_STATUS;
+
+ rdmsrq_safe(MSR_HWP_STATUS, &value);
+ if (!(value & status_mask))
return;
- spin_lock_irqsave(&hwp_notify_lock, flags);
+ raw_spin_lock_irqsave(&hwp_notify_lock, flags);
if (!cpumask_test_cpu(this_cpu, &hwp_intr_enable_mask))
goto ack_intr;
- /*
- * Currently we never free all_cpu_data. And we can't reach here
- * without this allocated. But for safety for future changes, added
- * check.
- */
- if (unlikely(!READ_ONCE(all_cpu_data)))
- goto ack_intr;
-
- /*
- * The free is done during cleanup, when cpufreq registry is failed.
- * We wouldn't be here if it fails on init or switch status. But for
- * future changes, added check.
- */
- cpudata = READ_ONCE(all_cpu_data[this_cpu]);
- if (unlikely(!cpudata))
- goto ack_intr;
-
- schedule_delayed_work(&cpudata->hwp_notify_work, msecs_to_jiffies(10));
+ schedule_delayed_work(&all_cpu_data[this_cpu]->hwp_notify_work,
+ msecs_to_jiffies(10));
- spin_unlock_irqrestore(&hwp_notify_lock, flags);
+ raw_spin_unlock_irqrestore(&hwp_notify_lock, flags);
return;
ack_intr:
- wrmsrl_safe(MSR_HWP_STATUS, 0);
- spin_unlock_irqrestore(&hwp_notify_lock, flags);
+ wrmsrq_safe(MSR_HWP_STATUS, 0);
+ raw_spin_unlock_irqrestore(&hwp_notify_lock, flags);
}
static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata)
{
- unsigned long flags;
+ bool cancel_work;
- if (!boot_cpu_has(X86_FEATURE_HWP_NOTIFY))
+ if (!cpu_feature_enabled(X86_FEATURE_HWP_NOTIFY))
return;
- /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */
- wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
+ /* wrmsrq_on_cpu has to be outside spinlock as this can result in IPC */
+ wrmsrq_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
+
+ raw_spin_lock_irq(&hwp_notify_lock);
+ cancel_work = cpumask_test_and_clear_cpu(cpudata->cpu, &hwp_intr_enable_mask);
+ raw_spin_unlock_irq(&hwp_notify_lock);
- spin_lock_irqsave(&hwp_notify_lock, flags);
- if (cpumask_test_and_clear_cpu(cpudata->cpu, &hwp_intr_enable_mask))
- cancel_delayed_work(&cpudata->hwp_notify_work);
- spin_unlock_irqrestore(&hwp_notify_lock, flags);
+ if (cancel_work)
+ cancel_delayed_work_sync(&cpudata->hwp_notify_work);
}
+#define HWP_GUARANTEED_PERF_CHANGE_REQ BIT(0)
+#define HWP_HIGHEST_PERF_CHANGE_REQ BIT(2)
+
static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata)
{
- /* Enable HWP notification interrupt for guaranteed performance change */
+ /* Enable HWP notification interrupt for performance change */
if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) {
- unsigned long flags;
+ u64 interrupt_mask = HWP_GUARANTEED_PERF_CHANGE_REQ;
- spin_lock_irqsave(&hwp_notify_lock, flags);
+ raw_spin_lock_irq(&hwp_notify_lock);
INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work);
cpumask_set_cpu(cpudata->cpu, &hwp_intr_enable_mask);
- spin_unlock_irqrestore(&hwp_notify_lock, flags);
+ raw_spin_unlock_irq(&hwp_notify_lock);
+
+ if (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE))
+ interrupt_mask |= HWP_HIGHEST_PERF_CHANGE_REQ;
- /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */
- wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x01);
- wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0);
+ /* wrmsrq_on_cpu has to be outside spinlock as this can result in IPC */
+ wrmsrq_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, interrupt_mask);
+ wrmsrq_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0);
}
}
@@ -1748,9 +2096,9 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata)
{
/* First disable HWP notification interrupt till we activate again */
if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY))
- wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
+ wrmsrq_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
- wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
+ wrmsrq_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
intel_pstate_enable_hwp_interrupt(cpudata);
@@ -1764,7 +2112,7 @@ static int atom_get_min_pstate(int not_used)
{
u64 value;
- rdmsrl(MSR_ATOM_CORE_RATIOS, value);
+ rdmsrq(MSR_ATOM_CORE_RATIOS, value);
return (value >> 8) & 0x7F;
}
@@ -1772,7 +2120,7 @@ static int atom_get_max_pstate(int not_used)
{
u64 value;
- rdmsrl(MSR_ATOM_CORE_RATIOS, value);
+ rdmsrq(MSR_ATOM_CORE_RATIOS, value);
return (value >> 16) & 0x7F;
}
@@ -1780,7 +2128,7 @@ static int atom_get_turbo_pstate(int not_used)
{
u64 value;
- rdmsrl(MSR_ATOM_CORE_TURBO_RATIOS, value);
+ rdmsrq(MSR_ATOM_CORE_TURBO_RATIOS, value);
return value & 0x7F;
}
@@ -1791,7 +2139,7 @@ static u64 atom_get_val(struct cpudata *cpudata, int pstate)
u32 vid;
val = (u64)pstate << 8;
- if (global.no_turbo && !global.turbo_disabled)
+ if (READ_ONCE(global.no_turbo) && !READ_ONCE(global.turbo_disabled))
val |= (u64)1 << 32;
vid_fp = cpudata->vid.min + mul_fp(
@@ -1815,7 +2163,7 @@ static int silvermont_get_scaling(void)
static int silvermont_freq_table[] = {
83300, 100000, 133300, 116700, 80000};
- rdmsrl(MSR_FSB_FREQ, value);
+ rdmsrq(MSR_FSB_FREQ, value);
i = value & 0x7;
WARN_ON(i > 4);
@@ -1831,7 +2179,7 @@ static int airmont_get_scaling(void)
83300, 100000, 133300, 116700, 80000,
93300, 90000, 88900, 87500};
- rdmsrl(MSR_FSB_FREQ, value);
+ rdmsrq(MSR_FSB_FREQ, value);
i = value & 0xF;
WARN_ON(i > 8);
@@ -1842,7 +2190,7 @@ static void atom_get_vid(struct cpudata *cpudata)
{
u64 value;
- rdmsrl(MSR_ATOM_CORE_VIDS, value);
+ rdmsrq(MSR_ATOM_CORE_VIDS, value);
cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
cpudata->vid.ratio = div_fp(
@@ -1850,7 +2198,7 @@ static void atom_get_vid(struct cpudata *cpudata)
int_tofp(cpudata->pstate.max_pstate -
cpudata->pstate.min_pstate));
- rdmsrl(MSR_ATOM_CORE_TURBO_VIDS, value);
+ rdmsrq(MSR_ATOM_CORE_TURBO_VIDS, value);
cpudata->vid.turbo = value & 0x7f;
}
@@ -1858,7 +2206,7 @@ static int core_get_min_pstate(int cpu)
{
u64 value;
- rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value);
+ rdmsrq_on_cpu(cpu, MSR_PLATFORM_INFO, &value);
return (value >> 40) & 0xFF;
}
@@ -1866,7 +2214,7 @@ static int core_get_max_pstate_physical(int cpu)
{
u64 value;
- rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value);
+ rdmsrq_on_cpu(cpu, MSR_PLATFORM_INFO, &value);
return (value >> 8) & 0xFF;
}
@@ -1880,13 +2228,13 @@ static int core_get_tdp_ratio(int cpu, u64 plat_info)
int err;
/* Get the TDP level (0, 1, 2) to get ratios */
- err = rdmsrl_safe_on_cpu(cpu, MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
+ err = rdmsrq_safe_on_cpu(cpu, MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
if (err)
return err;
/* TDP MSR are continuous starting at 0x648 */
tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x03);
- err = rdmsrl_safe_on_cpu(cpu, tdp_msr, &tdp_ratio);
+ err = rdmsrq_safe_on_cpu(cpu, tdp_msr, &tdp_ratio);
if (err)
return err;
@@ -1911,7 +2259,7 @@ static int core_get_max_pstate(int cpu)
int tdp_ratio;
int err;
- rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &plat_info);
+ rdmsrq_on_cpu(cpu, MSR_PLATFORM_INFO, &plat_info);
max_pstate = (plat_info >> 8) & 0xFF;
tdp_ratio = core_get_tdp_ratio(cpu, plat_info);
@@ -1923,7 +2271,7 @@ static int core_get_max_pstate(int cpu)
return tdp_ratio;
}
- err = rdmsrl_safe_on_cpu(cpu, MSR_TURBO_ACTIVATION_RATIO, &tar);
+ err = rdmsrq_safe_on_cpu(cpu, MSR_TURBO_ACTIVATION_RATIO, &tar);
if (!err) {
int tar_levels;
@@ -1943,7 +2291,7 @@ static int core_get_turbo_pstate(int cpu)
u64 value;
int nont, ret;
- rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value);
+ rdmsrq_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value);
nont = core_get_max_pstate(cpu);
ret = (value) & 255;
if (ret <= nont)
@@ -1956,7 +2304,7 @@ static u64 core_get_val(struct cpudata *cpudata, int pstate)
u64 val;
val = (u64)pstate << 8;
- if (global.no_turbo && !global.turbo_disabled)
+ if (READ_ONCE(global.no_turbo) && !READ_ONCE(global.turbo_disabled))
val |= (u64)1 << 32;
return val;
@@ -1972,7 +2320,7 @@ static int knl_get_turbo_pstate(int cpu)
u64 value;
int nont, ret;
- rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value);
+ rdmsrq_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value);
nont = core_get_max_pstate(cpu);
ret = (((value) >> 8) & 0xFF);
if (ret <= nont)
@@ -1980,33 +2328,31 @@ static int knl_get_turbo_pstate(int cpu)
return ret;
}
-static void hybrid_get_type(void *data)
-{
- u8 *cpu_type = data;
-
- *cpu_type = get_this_hybrid_cpu_type();
-}
-
static int hwp_get_cpu_scaling(int cpu)
{
- u8 cpu_type = 0;
+ if (hybrid_scaling_factor) {
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+ u8 cpu_type = c->topo.intel_type;
+
+ /*
+ * Return the hybrid scaling factor for P-cores and use the
+ * default core scaling for E-cores.
+ */
+ if (cpu_type == INTEL_CPU_TYPE_CORE)
+ return hybrid_scaling_factor;
- smp_call_function_single(cpu, hybrid_get_type, &cpu_type, 1);
- /* P-cores have a smaller perf level-to-freqency scaling factor. */
- if (cpu_type == 0x40)
- return hybrid_scaling_factor;
+ if (cpu_type == INTEL_CPU_TYPE_ATOM)
+ return core_get_scaling();
+ }
- /* Use default core scaling for E-cores */
- if (cpu_type == 0x20)
+ /* Use core scaling on non-hybrid systems. */
+ if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
return core_get_scaling();
/*
- * If reached here, this system is either non-hybrid (like Tiger
- * Lake) or hybrid-capable (like Alder Lake or Raptor Lake) with
- * no E cores (in which case CPUID for hybrid support is 0).
- *
- * The CPPC nominal_frequency field is 0 for non-hybrid systems,
- * so the default core scaling will be used for them.
+ * The system is hybrid, but the hybrid scaling factor is not known or
+ * the CPU type is not one of the above, so use CPPC to compute the
+ * scaling factor for this CPU.
*/
return intel_pstate_cppc_get_scaling(cpu);
}
@@ -2020,7 +2366,7 @@ static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
* the CPU being updated, so force the register update to run on the
* right CPU.
*/
- wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
+ wrmsrq_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
pstate_funcs.get_val(cpu, pstate));
}
@@ -2029,14 +2375,6 @@ static void intel_pstate_set_min_pstate(struct cpudata *cpu)
intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
}
-static void intel_pstate_max_within_limits(struct cpudata *cpu)
-{
- int pstate = max(cpu->pstate.min_pstate, cpu->max_perf_ratio);
-
- update_turbo_state();
- intel_pstate_set_pstate(cpu, pstate);
-}
-
static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
{
int perf_ctl_max_phys = pstate_funcs.get_max_physical(cpu->cpu);
@@ -2051,11 +2389,18 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
if (pstate_funcs.get_cpu_scaling) {
cpu->pstate.scaling = pstate_funcs.get_cpu_scaling(cpu->cpu);
- if (cpu->pstate.scaling != perf_ctl_scaling)
+ if (cpu->pstate.scaling != perf_ctl_scaling) {
intel_pstate_hybrid_hwp_adjust(cpu);
+ hwp_is_hybrid = true;
+ }
} else {
cpu->pstate.scaling = perf_ctl_scaling;
}
+ /*
+ * If the CPU is going online for the first time and it was
+ * offline initially, asym capacity scaling needs to be updated.
+ */
+ hybrid_update_capacity(cpu);
} else {
cpu->pstate.scaling = perf_ctl_scaling;
cpu->pstate.max_pstate = pstate_funcs.get_max(cpu->cpu);
@@ -2128,7 +2473,7 @@ static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu)
return;
hwp_req = (hwp_req & ~GENMASK_ULL(7, 0)) | cpu->hwp_boost_min;
- wrmsrl(MSR_HWP_REQUEST, hwp_req);
+ wrmsrq(MSR_HWP_REQUEST, hwp_req);
cpu->last_update = cpu->sample.time;
}
@@ -2141,7 +2486,7 @@ static inline void intel_pstate_hwp_boost_down(struct cpudata *cpu)
expired = time_after64(cpu->sample.time, cpu->last_update +
hwp_boost_hold_time_ns);
if (expired) {
- wrmsrl(MSR_HWP_REQUEST, cpu->hwp_req_cached);
+ wrmsrq(MSR_HWP_REQUEST, cpu->hwp_req_cached);
cpu->hwp_boost_min = 0;
}
}
@@ -2202,8 +2547,8 @@ static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
u64 tsc;
local_irq_save(flags);
- rdmsrl(MSR_IA32_APERF, aperf);
- rdmsrl(MSR_IA32_MPERF, mperf);
+ rdmsrq(MSR_IA32_APERF, aperf);
+ rdmsrq(MSR_IA32_MPERF, mperf);
tsc = rdtsc();
if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) {
local_irq_restore(flags);
@@ -2262,7 +2607,7 @@ static inline int32_t get_target_pstate(struct cpudata *cpu)
sample->busy_scaled = busy_frac * 100;
- target = global.no_turbo || global.turbo_disabled ?
+ target = READ_ONCE(global.no_turbo) ?
cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
target += target >> 2;
target = mul_fp(target, busy_frac);
@@ -2297,7 +2642,7 @@ static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
return;
cpu->pstate.current_pstate = pstate;
- wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
+ wrmsrq(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
}
static void intel_pstate_adjust_pstate(struct cpudata *cpu)
@@ -2306,8 +2651,6 @@ static void intel_pstate_adjust_pstate(struct cpudata *cpu)
struct sample *sample;
int target_pstate;
- update_turbo_state();
-
target_pstate = get_target_pstate(cpu);
target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu);
@@ -2402,52 +2745,58 @@ static const struct pstate_funcs knl_funcs = {
.get_val = core_get_val,
};
-#define X86_MATCH(model, policy) \
- X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_##model, \
- X86_FEATURE_APERFMPERF, &policy)
+#define X86_MATCH(vfm, policy) \
+ X86_MATCH_VFM_FEATURE(vfm, X86_FEATURE_APERFMPERF, &policy)
static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
- X86_MATCH(SANDYBRIDGE, core_funcs),
- X86_MATCH(SANDYBRIDGE_X, core_funcs),
- X86_MATCH(ATOM_SILVERMONT, silvermont_funcs),
- X86_MATCH(IVYBRIDGE, core_funcs),
- X86_MATCH(HASWELL, core_funcs),
- X86_MATCH(BROADWELL, core_funcs),
- X86_MATCH(IVYBRIDGE_X, core_funcs),
- X86_MATCH(HASWELL_X, core_funcs),
- X86_MATCH(HASWELL_L, core_funcs),
- X86_MATCH(HASWELL_G, core_funcs),
- X86_MATCH(BROADWELL_G, core_funcs),
- X86_MATCH(ATOM_AIRMONT, airmont_funcs),
- X86_MATCH(SKYLAKE_L, core_funcs),
- X86_MATCH(BROADWELL_X, core_funcs),
- X86_MATCH(SKYLAKE, core_funcs),
- X86_MATCH(BROADWELL_D, core_funcs),
- X86_MATCH(XEON_PHI_KNL, knl_funcs),
- X86_MATCH(XEON_PHI_KNM, knl_funcs),
- X86_MATCH(ATOM_GOLDMONT, core_funcs),
- X86_MATCH(ATOM_GOLDMONT_PLUS, core_funcs),
- X86_MATCH(SKYLAKE_X, core_funcs),
- X86_MATCH(COMETLAKE, core_funcs),
- X86_MATCH(ICELAKE_X, core_funcs),
- X86_MATCH(TIGERLAKE, core_funcs),
- X86_MATCH(SAPPHIRERAPIDS_X, core_funcs),
- X86_MATCH(EMERALDRAPIDS_X, core_funcs),
+ X86_MATCH(INTEL_SANDYBRIDGE, core_funcs),
+ X86_MATCH(INTEL_SANDYBRIDGE_X, core_funcs),
+ X86_MATCH(INTEL_ATOM_SILVERMONT, silvermont_funcs),
+ X86_MATCH(INTEL_IVYBRIDGE, core_funcs),
+ X86_MATCH(INTEL_HASWELL, core_funcs),
+ X86_MATCH(INTEL_BROADWELL, core_funcs),
+ X86_MATCH(INTEL_IVYBRIDGE_X, core_funcs),
+ X86_MATCH(INTEL_HASWELL_X, core_funcs),
+ X86_MATCH(INTEL_HASWELL_L, core_funcs),
+ X86_MATCH(INTEL_HASWELL_G, core_funcs),
+ X86_MATCH(INTEL_BROADWELL_G, core_funcs),
+ X86_MATCH(INTEL_ATOM_AIRMONT, airmont_funcs),
+ X86_MATCH(INTEL_SKYLAKE_L, core_funcs),
+ X86_MATCH(INTEL_BROADWELL_X, core_funcs),
+ X86_MATCH(INTEL_SKYLAKE, core_funcs),
+ X86_MATCH(INTEL_BROADWELL_D, core_funcs),
+ X86_MATCH(INTEL_XEON_PHI_KNL, knl_funcs),
+ X86_MATCH(INTEL_XEON_PHI_KNM, knl_funcs),
+ X86_MATCH(INTEL_ATOM_GOLDMONT, core_funcs),
+ X86_MATCH(INTEL_ATOM_GOLDMONT_PLUS, core_funcs),
+ X86_MATCH(INTEL_SKYLAKE_X, core_funcs),
+ X86_MATCH(INTEL_COMETLAKE, core_funcs),
+ X86_MATCH(INTEL_ICELAKE_X, core_funcs),
+ X86_MATCH(INTEL_TIGERLAKE, core_funcs),
+ X86_MATCH(INTEL_SAPPHIRERAPIDS_X, core_funcs),
+ X86_MATCH(INTEL_EMERALDRAPIDS_X, core_funcs),
{}
};
MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
+#ifdef CONFIG_ACPI
static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
- X86_MATCH(BROADWELL_D, core_funcs),
- X86_MATCH(BROADWELL_X, core_funcs),
- X86_MATCH(SKYLAKE_X, core_funcs),
- X86_MATCH(ICELAKE_X, core_funcs),
- X86_MATCH(SAPPHIRERAPIDS_X, core_funcs),
+ X86_MATCH(INTEL_BROADWELL_D, core_funcs),
+ X86_MATCH(INTEL_BROADWELL_X, core_funcs),
+ X86_MATCH(INTEL_SKYLAKE_X, core_funcs),
+ X86_MATCH(INTEL_ICELAKE_X, core_funcs),
+ X86_MATCH(INTEL_SAPPHIRERAPIDS_X, core_funcs),
+ X86_MATCH(INTEL_EMERALDRAPIDS_X, core_funcs),
+ X86_MATCH(INTEL_GRANITERAPIDS_D, core_funcs),
+ X86_MATCH(INTEL_GRANITERAPIDS_X, core_funcs),
+ X86_MATCH(INTEL_ATOM_CRESTMONT, core_funcs),
+ X86_MATCH(INTEL_ATOM_CRESTMONT_X, core_funcs),
{}
};
+#endif
static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = {
- X86_MATCH(KABYLAKE, core_funcs),
+ X86_MATCH(INTEL_KABYLAKE, core_funcs),
{}
};
@@ -2484,7 +2833,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
}
cpu->epp_powersave = -EINVAL;
- cpu->epp_policy = 0;
+ cpu->epp_policy = CPUFREQ_POLICY_UNKNOWN;
intel_pstate_get_cpu_pstates(cpu);
@@ -2526,7 +2875,7 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu)
static int intel_pstate_get_max_freq(struct cpudata *cpu)
{
- return global.turbo_disabled || global.no_turbo ?
+ return READ_ONCE(global.no_turbo) ?
cpu->pstate.max_freq : cpu->pstate.turbo_freq;
}
@@ -2611,12 +2960,14 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
intel_pstate_update_perf_limits(cpu, policy->min, policy->max);
if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
+ int pstate = max(cpu->pstate.min_pstate, cpu->max_perf_ratio);
+
/*
* NOHZ_FULL CPUs need this as the governor callback may not
* be invoked on them.
*/
intel_pstate_clear_update_util_hook(policy->cpu);
- intel_pstate_max_within_limits(cpu);
+ intel_pstate_set_pstate(cpu, pstate);
} else {
intel_pstate_set_update_util_hook(policy->cpu);
}
@@ -2659,10 +3010,9 @@ static void intel_pstate_verify_cpu_policy(struct cpudata *cpu,
{
int max_freq;
- update_turbo_state();
if (hwp_active) {
intel_pstate_get_hwp_cap(cpu);
- max_freq = global.no_turbo || global.turbo_disabled ?
+ max_freq = READ_ONCE(global.no_turbo) ?
cpu->pstate.max_freq : cpu->pstate.turbo_freq;
} else {
max_freq = intel_pstate_get_max_freq(cpu);
@@ -2719,6 +3069,8 @@ static int intel_pstate_cpu_online(struct cpufreq_policy *policy)
*/
intel_pstate_hwp_reenable(cpu);
cpu->suspended = false;
+
+ hybrid_update_capacity(cpu);
}
return 0;
@@ -2731,13 +3083,11 @@ static int intel_pstate_cpu_offline(struct cpufreq_policy *policy)
return intel_cpufreq_cpu_offline(policy);
}
-static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
+static void intel_pstate_cpu_exit(struct cpufreq_policy *policy)
{
pr_debug("CPU %d exiting\n", policy->cpu);
policy->fast_switch_possible = false;
-
- return 0;
}
static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
@@ -2756,9 +3106,7 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
/* cpuinfo and default policy values */
policy->cpuinfo.min_freq = cpu->pstate.min_freq;
- update_turbo_state();
- global.turbo_disabled_mf = global.turbo_disabled;
- policy->cpuinfo.max_freq = global.turbo_disabled ?
+ policy->cpuinfo.max_freq = READ_ONCE(global.no_turbo) ?
cpu->pstate.max_freq : cpu->pstate.turbo_freq;
policy->min = policy->cpuinfo.min_freq;
@@ -2874,19 +3222,19 @@ static void intel_cpufreq_hwp_update(struct cpudata *cpu, u32 min, u32 max,
WRITE_ONCE(cpu->hwp_req_cached, value);
if (fast_switch)
- wrmsrl(MSR_HWP_REQUEST, value);
+ wrmsrq(MSR_HWP_REQUEST, value);
else
- wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
+ wrmsrq_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
}
static void intel_cpufreq_perf_ctl_update(struct cpudata *cpu,
u32 target_pstate, bool fast_switch)
{
if (fast_switch)
- wrmsrl(MSR_IA32_PERF_CTL,
+ wrmsrq(MSR_IA32_PERF_CTL,
pstate_funcs.get_val(cpu, target_pstate));
else
- wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
+ wrmsrq_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
pstate_funcs.get_val(cpu, target_pstate));
}
@@ -2923,8 +3271,6 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy,
struct cpufreq_freqs freqs;
int target_pstate;
- update_turbo_state();
-
freqs.old = policy->cur;
freqs.new = target_freq;
@@ -2946,8 +3292,6 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
struct cpudata *cpu = all_cpu_data[policy->cpu];
int target_pstate;
- update_turbo_state();
-
target_pstate = intel_pstate_freq_to_hwp(cpu, target_freq);
target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, true);
@@ -2965,9 +3309,9 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum,
int old_pstate = cpu->pstate.current_pstate;
int cap_pstate, min_pstate, max_pstate, target_pstate;
- update_turbo_state();
- cap_pstate = global.turbo_disabled ? HWP_GUARANTEED_PERF(hwp_cap) :
- HWP_HIGHEST_PERF(hwp_cap);
+ cap_pstate = READ_ONCE(global.no_turbo) ?
+ HWP_GUARANTEED_PERF(hwp_cap) :
+ HWP_HIGHEST_PERF(hwp_cap);
/* Optimization: Avoid unnecessary divisions. */
@@ -3034,7 +3378,7 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
intel_pstate_get_hwp_cap(cpu);
- rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value);
+ rdmsrq_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value);
WRITE_ONCE(cpu->hwp_req_cached, value);
cpu->epp_cached = intel_pstate_get_epp(cpu, value);
@@ -3074,7 +3418,7 @@ pstate_exit:
return ret;
}
-static int intel_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+static void intel_cpufreq_cpu_exit(struct cpufreq_policy *policy)
{
struct freq_qos_request *req;
@@ -3084,7 +3428,7 @@ static int intel_cpufreq_cpu_exit(struct cpufreq_policy *policy)
freq_qos_remove_request(req);
kfree(req);
- return intel_pstate_cpu_exit(policy);
+ intel_pstate_cpu_exit(policy);
}
static int intel_cpufreq_suspend(struct cpufreq_policy *policy)
@@ -3101,7 +3445,7 @@ static int intel_cpufreq_suspend(struct cpufreq_policy *policy)
* written by it may not be suitable.
*/
value &= ~HWP_DESIRED_PERF(~0L);
- wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
+ wrmsrq_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
WRITE_ONCE(cpu->hwp_req_cached, value);
}
@@ -3135,10 +3479,8 @@ static void intel_pstate_driver_cleanup(void)
if (intel_pstate_driver == &intel_pstate)
intel_pstate_clear_update_util_hook(cpu);
- spin_lock(&hwp_notify_lock);
kfree(all_cpu_data[cpu]);
WRITE_ONCE(all_cpu_data[cpu], NULL);
- spin_unlock(&hwp_notify_lock);
}
}
cpus_read_unlock();
@@ -3148,6 +3490,7 @@ static void intel_pstate_driver_cleanup(void)
static int intel_pstate_register_driver(struct cpufreq_driver *driver)
{
+ bool refresh_cpu_cap_scaling;
int ret;
if (driver == &intel_pstate)
@@ -3155,6 +3498,12 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver)
memset(&global, 0, sizeof(global));
global.max_perf_pct = 100;
+ global.turbo_disabled = turbo_is_disabled();
+ global.no_turbo = global.turbo_disabled;
+
+ arch_set_max_freq_ratio(global.turbo_disabled);
+
+ refresh_cpu_cap_scaling = hybrid_clear_max_perf_cpu();
intel_pstate_driver = driver;
ret = cpufreq_register_driver(intel_pstate_driver);
@@ -3165,6 +3514,8 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver)
global.min_perf_pct = min_perf_pct_min();
+ hybrid_init_cpu_capacity_scaling(refresh_cpu_cap_scaling);
+
return 0;
}
@@ -3344,7 +3695,7 @@ static bool __init intel_pstate_platform_pwr_mgmt_exists(void)
id = x86_match_cpu(intel_pstate_cpu_oob_ids);
if (id) {
- rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr);
+ rdmsrq(MSR_MISC_PWR_MGMT, misc_pwr);
if (misc_pwr & BITMASK_OOB) {
pr_debug("Bit 8 or 18 in the MISC_PWR_MGMT MSR set\n");
pr_debug("P states are controlled in Out of Band mode by the firmware/hardware\n");
@@ -3386,14 +3737,13 @@ static inline void intel_pstate_request_control_from_smm(void) {}
#define INTEL_PSTATE_HWP_BROADWELL 0x01
-#define X86_MATCH_HWP(model, hwp_mode) \
- X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_##model, \
- X86_FEATURE_HWP, hwp_mode)
+#define X86_MATCH_HWP(vfm, hwp_mode) \
+ X86_MATCH_VFM_FEATURE(vfm, X86_FEATURE_HWP, hwp_mode)
static const struct x86_cpu_id hwp_support_ids[] __initconst = {
- X86_MATCH_HWP(BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL),
- X86_MATCH_HWP(BROADWELL_D, INTEL_PSTATE_HWP_BROADWELL),
- X86_MATCH_HWP(ANY, 0),
+ X86_MATCH_HWP(INTEL_BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL),
+ X86_MATCH_HWP(INTEL_BROADWELL_D, INTEL_PSTATE_HWP_BROADWELL),
+ X86_MATCH_HWP(INTEL_ANY, 0),
{}
};
@@ -3401,7 +3751,7 @@ static bool intel_pstate_hwp_is_enabled(void)
{
u64 value;
- rdmsrl(MSR_PM_ENABLE, value);
+ rdmsrq(MSR_PM_ENABLE, value);
return !!(value & 0x1);
}
@@ -3426,15 +3776,26 @@ static const struct x86_cpu_id intel_epp_default[] = {
* which can result in one core turbo frequency for
* AlderLake Mobile CPUs.
*/
- X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, HWP_SET_DEF_BALANCE_PERF_EPP(102)),
- X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)),
- X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE,
- HWP_EPP_BALANCE_POWERSAVE, 115, 16)),
+ X86_MATCH_VFM(INTEL_ALDERLAKE_L, HWP_SET_DEF_BALANCE_PERF_EPP(102)),
+ X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)),
+ X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)),
+ X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)),
+ X86_MATCH_VFM(INTEL_GRANITERAPIDS_D, HWP_SET_DEF_BALANCE_PERF_EPP(32)),
+ X86_MATCH_VFM(INTEL_METEORLAKE_L, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE,
+ 179, 64, 16)),
+ X86_MATCH_VFM(INTEL_ARROWLAKE, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE,
+ 179, 64, 16)),
{}
};
static const struct x86_cpu_id intel_hybrid_scaling_factor[] = {
- X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, HYBRID_SCALING_FACTOR_MTL),
+ X86_MATCH_VFM(INTEL_ALDERLAKE, HYBRID_SCALING_FACTOR_ADL),
+ X86_MATCH_VFM(INTEL_ALDERLAKE_L, HYBRID_SCALING_FACTOR_ADL),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE, HYBRID_SCALING_FACTOR_ADL),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE_P, HYBRID_SCALING_FACTOR_ADL),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE_S, HYBRID_SCALING_FACTOR_ADL),
+ X86_MATCH_VFM(INTEL_METEORLAKE_L, HYBRID_SCALING_FACTOR_MTL),
+ X86_MATCH_VFM(INTEL_LUNARLAKE_M, HYBRID_SCALING_FACTOR_LNL),
{}
};
@@ -3447,6 +3808,15 @@ static int __init intel_pstate_init(void)
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return -ENODEV;
+ /*
+ * The Intel pstate driver will be ignored if the platform
+ * firmware has its own power management modes.
+ */
+ if (intel_pstate_platform_pwr_mgmt_exists()) {
+ pr_info("P-states controlled by the platform\n");
+ return -ENODEV;
+ }
+
id = x86_match_cpu(hwp_support_ids);
if (id) {
hwp_forced = intel_pstate_hwp_is_enabled();
@@ -3466,7 +3836,7 @@ static int __init intel_pstate_init(void)
* deal with it.
*/
if ((!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) || hwp_forced) {
- WRITE_ONCE(hwp_active, 1);
+ hwp_active = true;
hwp_mode_bdw = id->driver_data;
intel_pstate.attr = hwp_cpufreq_attrs;
intel_cpufreq.attr = hwp_cpufreq_attrs;
@@ -3502,15 +3872,6 @@ static int __init intel_pstate_init(void)
default_driver = &intel_cpufreq;
hwp_cpu_matched:
- /*
- * The Intel pstate driver will be ignored if the platform
- * firmware has its own power management modes.
- */
- if (intel_pstate_platform_pwr_mgmt_exists()) {
- pr_info("P-states controlled by the platform\n");
- return -ENODEV;
- }
-
if (!hwp_active && hwp_only)
return -ENOTSUPP;
@@ -3594,6 +3955,9 @@ static int __init intel_pstate_setup(char *str)
if (!strcmp(str, "no_hwp"))
no_hwp = 1;
+ if (!strcmp(str, "no_cas"))
+ no_cas = true;
+
if (!strcmp(str, "force"))
force_load = 1;
if (!strcmp(str, "hwp_only"))