diff options
Diffstat (limited to 'drivers/cpufreq/intel_pstate.c')
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 679 |
1 files changed, 474 insertions, 205 deletions
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 79619227ea51..9c4cc01fd51a 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -16,6 +16,7 @@ #include <linux/tick.h> #include <linux/slab.h> #include <linux/sched/cpufreq.h> +#include <linux/sched/smt.h> #include <linux/list.h> #include <linux/cpu.h> #include <linux/cpufreq.h> @@ -25,7 +26,9 @@ #include <linux/acpi.h> #include <linux/vmalloc.h> #include <linux/pm_qos.h> +#include <linux/bitfield.h> #include <trace/events/power.h> +#include <linux/units.h> #include <asm/cpu.h> #include <asm/div64.h> @@ -172,7 +175,6 @@ struct vid_data { * based on the MSR_IA32_MISC_ENABLE value and whether or * not the maximum reported turbo P-state is different from * the maximum reported non-turbo one. - * @turbo_disabled_mf: The @turbo_disabled value reflected by cpuinfo.max_freq. * @min_perf_pct: Minimum capacity limit in percent of the maximum turbo * P-state capacity. * @max_perf_pct: Maximum capacity limit in percent of the maximum turbo @@ -181,7 +183,6 @@ struct vid_data { struct global_params { bool no_turbo; bool turbo_disabled; - bool turbo_disabled_mf; int max_perf_pct; int min_perf_pct; }; @@ -201,8 +202,6 @@ struct global_params { * @prev_aperf: Last APERF value read from APERF MSR * @prev_mperf: Last MPERF value read from MPERF MSR * @prev_tsc: Last timestamp counter (TSC) value - * @prev_cummulative_iowait: IO Wait time difference from last and - * current sample * @sample: Storage for storing last Sample data * @min_perf_ratio: Minimum capacity in terms of PERF or HWP ratios * @max_perf_ratio: Maximum capacity in terms of PERF or HWP ratios @@ -214,10 +213,11 @@ struct global_params { * @epp_policy: Last saved policy used to set EPP/EPB * @epp_default: Power on default HWP energy performance * preference/bias - * @epp_cached Cached HWP energy-performance preference value + * @epp_cached: Cached HWP energy-performance preference value * @hwp_req_cached: Cached value of the last HWP Request MSR * @hwp_cap_cached: Cached value of the last HWP Capabilities MSR * @last_io_update: Last time when IO wake flag was set + * @capacity_perf: Highest perf used for scale invariance * @sched_flags: Store scheduler flags for possible cross CPU update * @hwp_boost_min: Last HWP boosted min performance * @suspended: Whether or not the driver has been suspended. @@ -241,7 +241,6 @@ struct cpudata { u64 prev_aperf; u64 prev_mperf; u64 prev_tsc; - u64 prev_cummulative_iowait; struct sample sample; int32_t min_perf_ratio; int32_t max_perf_ratio; @@ -257,6 +256,7 @@ struct cpudata { u64 hwp_req_cached; u64 hwp_cap_cached; u64 last_io_update; + unsigned int capacity_perf; unsigned int sched_flags; u32 hwp_boost_min; bool suspended; @@ -294,18 +294,20 @@ struct pstate_funcs { static struct pstate_funcs pstate_funcs __read_mostly; -static int hwp_active __read_mostly; -static int hwp_mode_bdw __read_mostly; -static bool per_cpu_limits __read_mostly; +static bool hwp_active __ro_after_init; +static int hwp_mode_bdw __ro_after_init; +static bool per_cpu_limits __ro_after_init; +static bool hwp_forced __ro_after_init; static bool hwp_boost __read_mostly; -static bool hwp_forced __read_mostly; +static bool hwp_is_hybrid; static struct cpufreq_driver *intel_pstate_driver __read_mostly; -#define HYBRID_SCALING_FACTOR 78741 +#define HYBRID_SCALING_FACTOR_ADL 78741 #define HYBRID_SCALING_FACTOR_MTL 80000 +#define HYBRID_SCALING_FACTOR_LNL 86957 -static int hybrid_scaling_factor = HYBRID_SCALING_FACTOR; +static int hybrid_scaling_factor; static inline int core_get_scaling(void) { @@ -359,15 +361,14 @@ static void intel_pstate_set_itmt_prio(int cpu) int ret; ret = cppc_get_perf_caps(cpu, &cppc_perf); - if (ret) - return; - /* - * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff. - * In this case we can't use CPPC.highest_perf to enable ITMT. - * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide. + * If CPPC is not available, fall back to MSR_HWP_CAPABILITIES bits [8:0]. + * + * Also, on some systems with overclocking enabled, CPPC.highest_perf is + * hardcoded to 0xff, so CPPC.highest_perf cannot be used to enable ITMT. + * Fall back to MSR_HWP_CAPABILITIES then too. */ - if (cppc_perf.highest_perf == CPPC_MAX_PERF) + if (ret || cppc_perf.highest_perf == CPPC_MAX_PERF) cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached)); /* @@ -414,18 +415,15 @@ static int intel_pstate_get_cppc_guaranteed(int cpu) static int intel_pstate_cppc_get_scaling(int cpu) { struct cppc_perf_caps cppc_perf; - int ret; - - ret = cppc_get_perf_caps(cpu, &cppc_perf); /* - * If the nominal frequency and the nominal performance are not - * zero and the ratio between them is not 100, return the hybrid - * scaling factor. + * Compute the perf-to-frequency scaling factor for the given CPU if + * possible, unless it would be 0. */ - if (!ret && cppc_perf.nominal_perf && cppc_perf.nominal_freq && - cppc_perf.nominal_perf * 100 != cppc_perf.nominal_freq) - return hybrid_scaling_factor; + if (!cppc_get_perf_caps(cpu, &cppc_perf) && + cppc_perf.nominal_perf && cppc_perf.nominal_freq) + return div_u64(cppc_perf.nominal_freq * KHZ_PER_MHZ, + cppc_perf.nominal_perf); return core_get_scaling(); } @@ -596,12 +594,13 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu) cpu->pstate.min_pstate = intel_pstate_freq_to_hwp(cpu, freq); } -static inline void update_turbo_state(void) +static bool turbo_is_disabled(void) { u64 misc_en; rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); - global.turbo_disabled = misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE; + + return !!(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); } static int min_perf_pct_min(void) @@ -937,6 +936,148 @@ static struct freq_attr *hwp_cpufreq_attrs[] = { NULL, }; +static struct cpudata *hybrid_max_perf_cpu __read_mostly; +/* + * Protects hybrid_max_perf_cpu, the capacity_perf fields in struct cpudata, + * and the x86 arch scale-invariance information from concurrent updates. + */ +static DEFINE_MUTEX(hybrid_capacity_lock); + +static void hybrid_set_cpu_capacity(struct cpudata *cpu) +{ + arch_set_cpu_capacity(cpu->cpu, cpu->capacity_perf, + hybrid_max_perf_cpu->capacity_perf, + cpu->capacity_perf, + cpu->pstate.max_pstate_physical); + + pr_debug("CPU%d: perf = %u, max. perf = %u, base perf = %d\n", cpu->cpu, + cpu->capacity_perf, hybrid_max_perf_cpu->capacity_perf, + cpu->pstate.max_pstate_physical); +} + +static void hybrid_clear_cpu_capacity(unsigned int cpunum) +{ + arch_set_cpu_capacity(cpunum, 1, 1, 1, 1); +} + +static void hybrid_get_capacity_perf(struct cpudata *cpu) +{ + if (READ_ONCE(global.no_turbo)) { + cpu->capacity_perf = cpu->pstate.max_pstate_physical; + return; + } + + cpu->capacity_perf = HWP_HIGHEST_PERF(READ_ONCE(cpu->hwp_cap_cached)); +} + +static void hybrid_set_capacity_of_cpus(void) +{ + int cpunum; + + for_each_online_cpu(cpunum) { + struct cpudata *cpu = all_cpu_data[cpunum]; + + if (cpu) + hybrid_set_cpu_capacity(cpu); + } +} + +static void hybrid_update_cpu_capacity_scaling(void) +{ + struct cpudata *max_perf_cpu = NULL; + unsigned int max_cap_perf = 0; + int cpunum; + + for_each_online_cpu(cpunum) { + struct cpudata *cpu = all_cpu_data[cpunum]; + + if (!cpu) + continue; + + /* + * During initialization, CPU performance at full capacity needs + * to be determined. + */ + if (!hybrid_max_perf_cpu) + hybrid_get_capacity_perf(cpu); + + /* + * If hybrid_max_perf_cpu is not NULL at this point, it is + * being replaced, so don't take it into account when looking + * for the new one. + */ + if (cpu == hybrid_max_perf_cpu) + continue; + + if (cpu->capacity_perf > max_cap_perf) { + max_cap_perf = cpu->capacity_perf; + max_perf_cpu = cpu; + } + } + + if (max_perf_cpu) { + hybrid_max_perf_cpu = max_perf_cpu; + hybrid_set_capacity_of_cpus(); + } else { + pr_info("Found no CPUs with nonzero maximum performance\n"); + /* Revert to the flat CPU capacity structure. */ + for_each_online_cpu(cpunum) + hybrid_clear_cpu_capacity(cpunum); + } +} + +static void __hybrid_refresh_cpu_capacity_scaling(void) +{ + hybrid_max_perf_cpu = NULL; + hybrid_update_cpu_capacity_scaling(); +} + +static void hybrid_refresh_cpu_capacity_scaling(void) +{ + guard(mutex)(&hybrid_capacity_lock); + + __hybrid_refresh_cpu_capacity_scaling(); +} + +static void hybrid_init_cpu_capacity_scaling(bool refresh) +{ + /* + * If hybrid_max_perf_cpu is set at this point, the hybrid CPU capacity + * scaling has been enabled already and the driver is just changing the + * operation mode. + */ + if (refresh) { + hybrid_refresh_cpu_capacity_scaling(); + return; + } + + /* + * On hybrid systems, use asym capacity instead of ITMT, but because + * the capacity of SMT threads is not deterministic even approximately, + * do not do that when SMT is in use. + */ + if (hwp_is_hybrid && !sched_smt_active() && arch_enable_hybrid_capacity_scale()) { + hybrid_refresh_cpu_capacity_scaling(); + /* + * Disabling ITMT causes sched domains to be rebuilt to disable asym + * packing and enable asym capacity. + */ + sched_clear_itmt_support(); + } +} + +static bool hybrid_clear_max_perf_cpu(void) +{ + bool ret; + + guard(mutex)(&hybrid_capacity_lock); + + ret = !!hybrid_max_perf_cpu; + hybrid_max_perf_cpu = NULL; + + return ret; +} + static void __intel_pstate_get_hwp_cap(struct cpudata *cpu) { u64 cap; @@ -965,6 +1106,43 @@ static void intel_pstate_get_hwp_cap(struct cpudata *cpu) } } +static void hybrid_update_capacity(struct cpudata *cpu) +{ + unsigned int max_cap_perf; + + mutex_lock(&hybrid_capacity_lock); + + if (!hybrid_max_perf_cpu) + goto unlock; + + /* + * The maximum performance of the CPU may have changed, but assume + * that the performance of the other CPUs has not changed. + */ + max_cap_perf = hybrid_max_perf_cpu->capacity_perf; + + intel_pstate_get_hwp_cap(cpu); + + hybrid_get_capacity_perf(cpu); + /* Should hybrid_max_perf_cpu be replaced by this CPU? */ + if (cpu->capacity_perf > max_cap_perf) { + hybrid_max_perf_cpu = cpu; + hybrid_set_capacity_of_cpus(); + goto unlock; + } + + /* If this CPU is hybrid_max_perf_cpu, should it be replaced? */ + if (cpu == hybrid_max_perf_cpu && cpu->capacity_perf < max_cap_perf) { + hybrid_update_cpu_capacity_scaling(); + goto unlock; + } + + hybrid_set_cpu_capacity(cpu); + +unlock: + mutex_unlock(&hybrid_capacity_lock); +} + static void intel_pstate_hwp_set(unsigned int cpu) { struct cpudata *cpu_data = all_cpu_data[cpu]; @@ -1073,6 +1251,22 @@ static void intel_pstate_hwp_offline(struct cpudata *cpu) value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE); wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value); + + mutex_lock(&hybrid_capacity_lock); + + if (!hybrid_max_perf_cpu) { + mutex_unlock(&hybrid_capacity_lock); + + return; + } + + if (hybrid_max_perf_cpu == cpu) + hybrid_update_cpu_capacity_scaling(); + + mutex_unlock(&hybrid_capacity_lock); + + /* Reset the capacity of the CPU going offline to the initial value. */ + hybrid_clear_cpu_capacity(cpu->cpu); } #define POWER_CTL_EE_ENABLE 1 @@ -1156,42 +1350,58 @@ static void intel_pstate_update_policies(void) static void __intel_pstate_update_max_freq(struct cpudata *cpudata, struct cpufreq_policy *policy) { - policy->cpuinfo.max_freq = global.turbo_disabled_mf ? + if (hwp_active) + intel_pstate_get_hwp_cap(cpudata); + + policy->cpuinfo.max_freq = READ_ONCE(global.no_turbo) ? cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; + refresh_frequency_limits(policy); } -static void intel_pstate_update_max_freq(unsigned int cpu) +static void intel_pstate_update_limits(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); + struct cpudata *cpudata; if (!policy) return; - __intel_pstate_update_max_freq(all_cpu_data[cpu], policy); + cpudata = all_cpu_data[cpu]; + + __intel_pstate_update_max_freq(cpudata, policy); + + /* Prevent the driver from being unregistered now. */ + mutex_lock(&intel_pstate_driver_lock); cpufreq_cpu_release(policy); + + hybrid_update_capacity(cpudata); + + mutex_unlock(&intel_pstate_driver_lock); } -static void intel_pstate_update_limits(unsigned int cpu) +static void intel_pstate_update_limits_for_all(void) { - mutex_lock(&intel_pstate_driver_lock); + int cpu; - update_turbo_state(); - /* - * If turbo has been turned on or off globally, policy limits for - * all CPUs need to be updated to reflect that. - */ - if (global.turbo_disabled_mf != global.turbo_disabled) { - global.turbo_disabled_mf = global.turbo_disabled; - arch_set_max_freq_ratio(global.turbo_disabled); - for_each_possible_cpu(cpu) - intel_pstate_update_max_freq(cpu); - } else { - cpufreq_update_policy(cpu); + for_each_possible_cpu(cpu) { + struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); + + if (!policy) + continue; + + __intel_pstate_update_max_freq(all_cpu_data[cpu], policy); + + cpufreq_cpu_release(policy); } - mutex_unlock(&intel_pstate_driver_lock); + mutex_lock(&hybrid_capacity_lock); + + if (hybrid_max_perf_cpu) + __hybrid_refresh_cpu_capacity_scaling(); + + mutex_unlock(&hybrid_capacity_lock); } /************************** sysfs begin ************************/ @@ -1289,11 +1499,7 @@ static ssize_t show_no_turbo(struct kobject *kobj, return -EAGAIN; } - update_turbo_state(); - if (global.turbo_disabled) - ret = sprintf(buf, "%u\n", global.turbo_disabled); - else - ret = sprintf(buf, "%u\n", global.no_turbo); + ret = sprintf(buf, "%u\n", global.no_turbo); mutex_unlock(&intel_pstate_driver_lock); @@ -1304,32 +1510,39 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { unsigned int input; - int ret; + bool no_turbo; - ret = sscanf(buf, "%u", &input); - if (ret != 1) + if (sscanf(buf, "%u", &input) != 1) return -EINVAL; mutex_lock(&intel_pstate_driver_lock); if (!intel_pstate_driver) { - mutex_unlock(&intel_pstate_driver_lock); - return -EAGAIN; + count = -EAGAIN; + goto unlock_driver; } - mutex_lock(&intel_pstate_limits_lock); + no_turbo = !!clamp_t(int, input, 0, 1); - update_turbo_state(); - if (global.turbo_disabled) { - pr_notice_once("Turbo disabled by BIOS or unavailable on processor\n"); - mutex_unlock(&intel_pstate_limits_lock); - mutex_unlock(&intel_pstate_driver_lock); - return -EPERM; + WRITE_ONCE(global.turbo_disabled, turbo_is_disabled()); + if (global.turbo_disabled && !no_turbo) { + pr_notice("Turbo disabled by BIOS or unavailable on processor\n"); + count = -EPERM; + if (global.no_turbo) + goto unlock_driver; + else + no_turbo = 1; } - global.no_turbo = clamp_t(int, input, 0, 1); + if (no_turbo == global.no_turbo) { + goto unlock_driver; + } + + WRITE_ONCE(global.no_turbo, no_turbo); + + mutex_lock(&intel_pstate_limits_lock); - if (global.no_turbo) { + if (no_turbo) { struct cpudata *cpu = all_cpu_data[0]; int pct = cpu->pstate.max_pstate * 100 / cpu->pstate.turbo_pstate; @@ -1340,9 +1553,10 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, mutex_unlock(&intel_pstate_limits_lock); - intel_pstate_update_policies(); - arch_set_max_freq_ratio(global.no_turbo); + intel_pstate_update_limits_for_all(); + arch_set_max_freq_ratio(no_turbo); +unlock_driver: mutex_unlock(&intel_pstate_driver_lock); return count; @@ -1623,94 +1837,98 @@ static void intel_pstate_notify_work(struct work_struct *work) struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu); if (policy) { - intel_pstate_get_hwp_cap(cpudata); __intel_pstate_update_max_freq(cpudata, policy); cpufreq_cpu_release(policy); + + /* + * The driver will not be unregistered while this function is + * running, so update the capacity without acquiring the driver + * lock. + */ + hybrid_update_capacity(cpudata); } wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0); } -static DEFINE_SPINLOCK(hwp_notify_lock); +static DEFINE_RAW_SPINLOCK(hwp_notify_lock); static cpumask_t hwp_intr_enable_mask; +#define HWP_GUARANTEED_PERF_CHANGE_STATUS BIT(0) +#define HWP_HIGHEST_PERF_CHANGE_STATUS BIT(3) + void notify_hwp_interrupt(void) { unsigned int this_cpu = smp_processor_id(); - struct cpudata *cpudata; + u64 value, status_mask; unsigned long flags; - u64 value; - if (!READ_ONCE(hwp_active) || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) + if (!hwp_active || !cpu_feature_enabled(X86_FEATURE_HWP_NOTIFY)) return; + status_mask = HWP_GUARANTEED_PERF_CHANGE_STATUS; + if (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE)) + status_mask |= HWP_HIGHEST_PERF_CHANGE_STATUS; + rdmsrl_safe(MSR_HWP_STATUS, &value); - if (!(value & 0x01)) + if (!(value & status_mask)) return; - spin_lock_irqsave(&hwp_notify_lock, flags); + raw_spin_lock_irqsave(&hwp_notify_lock, flags); if (!cpumask_test_cpu(this_cpu, &hwp_intr_enable_mask)) goto ack_intr; - /* - * Currently we never free all_cpu_data. And we can't reach here - * without this allocated. But for safety for future changes, added - * check. - */ - if (unlikely(!READ_ONCE(all_cpu_data))) - goto ack_intr; - - /* - * The free is done during cleanup, when cpufreq registry is failed. - * We wouldn't be here if it fails on init or switch status. But for - * future changes, added check. - */ - cpudata = READ_ONCE(all_cpu_data[this_cpu]); - if (unlikely(!cpudata)) - goto ack_intr; - - schedule_delayed_work(&cpudata->hwp_notify_work, msecs_to_jiffies(10)); + schedule_delayed_work(&all_cpu_data[this_cpu]->hwp_notify_work, + msecs_to_jiffies(10)); - spin_unlock_irqrestore(&hwp_notify_lock, flags); + raw_spin_unlock_irqrestore(&hwp_notify_lock, flags); return; ack_intr: wrmsrl_safe(MSR_HWP_STATUS, 0); - spin_unlock_irqrestore(&hwp_notify_lock, flags); + raw_spin_unlock_irqrestore(&hwp_notify_lock, flags); } static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata) { - unsigned long flags; + bool cancel_work; - if (!boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) + if (!cpu_feature_enabled(X86_FEATURE_HWP_NOTIFY)) return; /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */ wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); - spin_lock_irqsave(&hwp_notify_lock, flags); - if (cpumask_test_and_clear_cpu(cpudata->cpu, &hwp_intr_enable_mask)) - cancel_delayed_work(&cpudata->hwp_notify_work); - spin_unlock_irqrestore(&hwp_notify_lock, flags); + raw_spin_lock_irq(&hwp_notify_lock); + cancel_work = cpumask_test_and_clear_cpu(cpudata->cpu, &hwp_intr_enable_mask); + raw_spin_unlock_irq(&hwp_notify_lock); + + if (cancel_work) + cancel_delayed_work_sync(&cpudata->hwp_notify_work); } +#define HWP_GUARANTEED_PERF_CHANGE_REQ BIT(0) +#define HWP_HIGHEST_PERF_CHANGE_REQ BIT(2) + static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata) { - /* Enable HWP notification interrupt for guaranteed performance change */ + /* Enable HWP notification interrupt for performance change */ if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) { - unsigned long flags; + u64 interrupt_mask = HWP_GUARANTEED_PERF_CHANGE_REQ; - spin_lock_irqsave(&hwp_notify_lock, flags); + raw_spin_lock_irq(&hwp_notify_lock); INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work); cpumask_set_cpu(cpudata->cpu, &hwp_intr_enable_mask); - spin_unlock_irqrestore(&hwp_notify_lock, flags); + raw_spin_unlock_irq(&hwp_notify_lock); + + if (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE)) + interrupt_mask |= HWP_HIGHEST_PERF_CHANGE_REQ; /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */ - wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x01); + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, interrupt_mask); wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0); } } @@ -1793,7 +2011,7 @@ static u64 atom_get_val(struct cpudata *cpudata, int pstate) u32 vid; val = (u64)pstate << 8; - if (global.no_turbo && !global.turbo_disabled) + if (READ_ONCE(global.no_turbo) && !READ_ONCE(global.turbo_disabled)) val |= (u64)1 << 32; vid_fp = cpudata->vid.min + mul_fp( @@ -1958,7 +2176,7 @@ static u64 core_get_val(struct cpudata *cpudata, int pstate) u64 val; val = (u64)pstate << 8; - if (global.no_turbo && !global.turbo_disabled) + if (READ_ONCE(global.no_turbo) && !READ_ONCE(global.turbo_disabled)) val |= (u64)1 << 32; return val; @@ -1991,24 +2209,30 @@ static void hybrid_get_type(void *data) static int hwp_get_cpu_scaling(int cpu) { - u8 cpu_type = 0; + if (hybrid_scaling_factor) { + u8 cpu_type = 0; + + smp_call_function_single(cpu, hybrid_get_type, &cpu_type, 1); + + /* + * Return the hybrid scaling factor for P-cores and use the + * default core scaling for E-cores. + */ + if (cpu_type == 0x40) + return hybrid_scaling_factor; - smp_call_function_single(cpu, hybrid_get_type, &cpu_type, 1); - /* P-cores have a smaller perf level-to-freqency scaling factor. */ - if (cpu_type == 0x40) - return hybrid_scaling_factor; + if (cpu_type == 0x20) + return core_get_scaling(); + } - /* Use default core scaling for E-cores */ - if (cpu_type == 0x20) + /* Use core scaling on non-hybrid systems. */ + if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) return core_get_scaling(); /* - * If reached here, this system is either non-hybrid (like Tiger - * Lake) or hybrid-capable (like Alder Lake or Raptor Lake) with - * no E cores (in which case CPUID for hybrid support is 0). - * - * The CPPC nominal_frequency field is 0 for non-hybrid systems, - * so the default core scaling will be used for them. + * The system is hybrid, but the hybrid scaling factor is not known or + * the CPU type is not one of the above, so use CPPC to compute the + * scaling factor for this CPU. */ return intel_pstate_cppc_get_scaling(cpu); } @@ -2031,14 +2255,6 @@ static void intel_pstate_set_min_pstate(struct cpudata *cpu) intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate); } -static void intel_pstate_max_within_limits(struct cpudata *cpu) -{ - int pstate = max(cpu->pstate.min_pstate, cpu->max_perf_ratio); - - update_turbo_state(); - intel_pstate_set_pstate(cpu, pstate); -} - static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) { int perf_ctl_max_phys = pstate_funcs.get_max_physical(cpu->cpu); @@ -2053,11 +2269,18 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) if (pstate_funcs.get_cpu_scaling) { cpu->pstate.scaling = pstate_funcs.get_cpu_scaling(cpu->cpu); - if (cpu->pstate.scaling != perf_ctl_scaling) + if (cpu->pstate.scaling != perf_ctl_scaling) { intel_pstate_hybrid_hwp_adjust(cpu); + hwp_is_hybrid = true; + } } else { cpu->pstate.scaling = perf_ctl_scaling; } + /* + * If the CPU is going online for the first time and it was + * offline initially, asym capacity scaling needs to be updated. + */ + hybrid_update_capacity(cpu); } else { cpu->pstate.scaling = perf_ctl_scaling; cpu->pstate.max_pstate = pstate_funcs.get_max(cpu->cpu); @@ -2264,7 +2487,7 @@ static inline int32_t get_target_pstate(struct cpudata *cpu) sample->busy_scaled = busy_frac * 100; - target = global.no_turbo || global.turbo_disabled ? + target = READ_ONCE(global.no_turbo) ? cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; target += target >> 2; target = mul_fp(target, busy_frac); @@ -2308,8 +2531,6 @@ static void intel_pstate_adjust_pstate(struct cpudata *cpu) struct sample *sample; int target_pstate; - update_turbo_state(); - target_pstate = get_target_pstate(cpu); target_pstate = intel_pstate_prepare_request(cpu, target_pstate); trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu); @@ -2404,52 +2625,58 @@ static const struct pstate_funcs knl_funcs = { .get_val = core_get_val, }; -#define X86_MATCH(model, policy) \ - X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_##model, \ - X86_FEATURE_APERFMPERF, &policy) +#define X86_MATCH(vfm, policy) \ + X86_MATCH_VFM_FEATURE(vfm, X86_FEATURE_APERFMPERF, &policy) static const struct x86_cpu_id intel_pstate_cpu_ids[] = { - X86_MATCH(SANDYBRIDGE, core_funcs), - X86_MATCH(SANDYBRIDGE_X, core_funcs), - X86_MATCH(ATOM_SILVERMONT, silvermont_funcs), - X86_MATCH(IVYBRIDGE, core_funcs), - X86_MATCH(HASWELL, core_funcs), - X86_MATCH(BROADWELL, core_funcs), - X86_MATCH(IVYBRIDGE_X, core_funcs), - X86_MATCH(HASWELL_X, core_funcs), - X86_MATCH(HASWELL_L, core_funcs), - X86_MATCH(HASWELL_G, core_funcs), - X86_MATCH(BROADWELL_G, core_funcs), - X86_MATCH(ATOM_AIRMONT, airmont_funcs), - X86_MATCH(SKYLAKE_L, core_funcs), - X86_MATCH(BROADWELL_X, core_funcs), - X86_MATCH(SKYLAKE, core_funcs), - X86_MATCH(BROADWELL_D, core_funcs), - X86_MATCH(XEON_PHI_KNL, knl_funcs), - X86_MATCH(XEON_PHI_KNM, knl_funcs), - X86_MATCH(ATOM_GOLDMONT, core_funcs), - X86_MATCH(ATOM_GOLDMONT_PLUS, core_funcs), - X86_MATCH(SKYLAKE_X, core_funcs), - X86_MATCH(COMETLAKE, core_funcs), - X86_MATCH(ICELAKE_X, core_funcs), - X86_MATCH(TIGERLAKE, core_funcs), - X86_MATCH(SAPPHIRERAPIDS_X, core_funcs), - X86_MATCH(EMERALDRAPIDS_X, core_funcs), + X86_MATCH(INTEL_SANDYBRIDGE, core_funcs), + X86_MATCH(INTEL_SANDYBRIDGE_X, core_funcs), + X86_MATCH(INTEL_ATOM_SILVERMONT, silvermont_funcs), + X86_MATCH(INTEL_IVYBRIDGE, core_funcs), + X86_MATCH(INTEL_HASWELL, core_funcs), + X86_MATCH(INTEL_BROADWELL, core_funcs), + X86_MATCH(INTEL_IVYBRIDGE_X, core_funcs), + X86_MATCH(INTEL_HASWELL_X, core_funcs), + X86_MATCH(INTEL_HASWELL_L, core_funcs), + X86_MATCH(INTEL_HASWELL_G, core_funcs), + X86_MATCH(INTEL_BROADWELL_G, core_funcs), + X86_MATCH(INTEL_ATOM_AIRMONT, airmont_funcs), + X86_MATCH(INTEL_SKYLAKE_L, core_funcs), + X86_MATCH(INTEL_BROADWELL_X, core_funcs), + X86_MATCH(INTEL_SKYLAKE, core_funcs), + X86_MATCH(INTEL_BROADWELL_D, core_funcs), + X86_MATCH(INTEL_XEON_PHI_KNL, knl_funcs), + X86_MATCH(INTEL_XEON_PHI_KNM, knl_funcs), + X86_MATCH(INTEL_ATOM_GOLDMONT, core_funcs), + X86_MATCH(INTEL_ATOM_GOLDMONT_PLUS, core_funcs), + X86_MATCH(INTEL_SKYLAKE_X, core_funcs), + X86_MATCH(INTEL_COMETLAKE, core_funcs), + X86_MATCH(INTEL_ICELAKE_X, core_funcs), + X86_MATCH(INTEL_TIGERLAKE, core_funcs), + X86_MATCH(INTEL_SAPPHIRERAPIDS_X, core_funcs), + X86_MATCH(INTEL_EMERALDRAPIDS_X, core_funcs), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); +#ifdef CONFIG_ACPI static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { - X86_MATCH(BROADWELL_D, core_funcs), - X86_MATCH(BROADWELL_X, core_funcs), - X86_MATCH(SKYLAKE_X, core_funcs), - X86_MATCH(ICELAKE_X, core_funcs), - X86_MATCH(SAPPHIRERAPIDS_X, core_funcs), + X86_MATCH(INTEL_BROADWELL_D, core_funcs), + X86_MATCH(INTEL_BROADWELL_X, core_funcs), + X86_MATCH(INTEL_SKYLAKE_X, core_funcs), + X86_MATCH(INTEL_ICELAKE_X, core_funcs), + X86_MATCH(INTEL_SAPPHIRERAPIDS_X, core_funcs), + X86_MATCH(INTEL_EMERALDRAPIDS_X, core_funcs), + X86_MATCH(INTEL_GRANITERAPIDS_D, core_funcs), + X86_MATCH(INTEL_GRANITERAPIDS_X, core_funcs), + X86_MATCH(INTEL_ATOM_CRESTMONT, core_funcs), + X86_MATCH(INTEL_ATOM_CRESTMONT_X, core_funcs), {} }; +#endif static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = { - X86_MATCH(KABYLAKE, core_funcs), + X86_MATCH(INTEL_KABYLAKE, core_funcs), {} }; @@ -2486,7 +2713,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum) } cpu->epp_powersave = -EINVAL; - cpu->epp_policy = 0; + cpu->epp_policy = CPUFREQ_POLICY_UNKNOWN; intel_pstate_get_cpu_pstates(cpu); @@ -2528,7 +2755,7 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu) static int intel_pstate_get_max_freq(struct cpudata *cpu) { - return global.turbo_disabled || global.no_turbo ? + return READ_ONCE(global.no_turbo) ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; } @@ -2613,12 +2840,14 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) intel_pstate_update_perf_limits(cpu, policy->min, policy->max); if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { + int pstate = max(cpu->pstate.min_pstate, cpu->max_perf_ratio); + /* * NOHZ_FULL CPUs need this as the governor callback may not * be invoked on them. */ intel_pstate_clear_update_util_hook(policy->cpu); - intel_pstate_max_within_limits(cpu); + intel_pstate_set_pstate(cpu, pstate); } else { intel_pstate_set_update_util_hook(policy->cpu); } @@ -2661,10 +2890,9 @@ static void intel_pstate_verify_cpu_policy(struct cpudata *cpu, { int max_freq; - update_turbo_state(); if (hwp_active) { intel_pstate_get_hwp_cap(cpu); - max_freq = global.no_turbo || global.turbo_disabled ? + max_freq = READ_ONCE(global.no_turbo) ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; } else { max_freq = intel_pstate_get_max_freq(cpu); @@ -2721,6 +2949,8 @@ static int intel_pstate_cpu_online(struct cpufreq_policy *policy) */ intel_pstate_hwp_reenable(cpu); cpu->suspended = false; + + hybrid_update_capacity(cpu); } return 0; @@ -2733,13 +2963,11 @@ static int intel_pstate_cpu_offline(struct cpufreq_policy *policy) return intel_cpufreq_cpu_offline(policy); } -static int intel_pstate_cpu_exit(struct cpufreq_policy *policy) +static void intel_pstate_cpu_exit(struct cpufreq_policy *policy) { pr_debug("CPU %d exiting\n", policy->cpu); policy->fast_switch_possible = false; - - return 0; } static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) @@ -2758,9 +2986,7 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) /* cpuinfo and default policy values */ policy->cpuinfo.min_freq = cpu->pstate.min_freq; - update_turbo_state(); - global.turbo_disabled_mf = global.turbo_disabled; - policy->cpuinfo.max_freq = global.turbo_disabled ? + policy->cpuinfo.max_freq = READ_ONCE(global.no_turbo) ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; policy->min = policy->cpuinfo.min_freq; @@ -2925,8 +3151,6 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy, struct cpufreq_freqs freqs; int target_pstate; - update_turbo_state(); - freqs.old = policy->cur; freqs.new = target_freq; @@ -2948,8 +3172,6 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, struct cpudata *cpu = all_cpu_data[policy->cpu]; int target_pstate; - update_turbo_state(); - target_pstate = intel_pstate_freq_to_hwp(cpu, target_freq); target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, true); @@ -2967,9 +3189,9 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum, int old_pstate = cpu->pstate.current_pstate; int cap_pstate, min_pstate, max_pstate, target_pstate; - update_turbo_state(); - cap_pstate = global.turbo_disabled ? HWP_GUARANTEED_PERF(hwp_cap) : - HWP_HIGHEST_PERF(hwp_cap); + cap_pstate = READ_ONCE(global.no_turbo) ? + HWP_GUARANTEED_PERF(hwp_cap) : + HWP_HIGHEST_PERF(hwp_cap); /* Optimization: Avoid unnecessary divisions. */ @@ -3076,7 +3298,7 @@ pstate_exit: return ret; } -static int intel_cpufreq_cpu_exit(struct cpufreq_policy *policy) +static void intel_cpufreq_cpu_exit(struct cpufreq_policy *policy) { struct freq_qos_request *req; @@ -3086,7 +3308,7 @@ static int intel_cpufreq_cpu_exit(struct cpufreq_policy *policy) freq_qos_remove_request(req); kfree(req); - return intel_pstate_cpu_exit(policy); + intel_pstate_cpu_exit(policy); } static int intel_cpufreq_suspend(struct cpufreq_policy *policy) @@ -3137,10 +3359,8 @@ static void intel_pstate_driver_cleanup(void) if (intel_pstate_driver == &intel_pstate) intel_pstate_clear_update_util_hook(cpu); - spin_lock(&hwp_notify_lock); kfree(all_cpu_data[cpu]); WRITE_ONCE(all_cpu_data[cpu], NULL); - spin_unlock(&hwp_notify_lock); } } cpus_read_unlock(); @@ -3150,6 +3370,7 @@ static void intel_pstate_driver_cleanup(void) static int intel_pstate_register_driver(struct cpufreq_driver *driver) { + bool refresh_cpu_cap_scaling; int ret; if (driver == &intel_pstate) @@ -3157,6 +3378,12 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver) memset(&global, 0, sizeof(global)); global.max_perf_pct = 100; + global.turbo_disabled = turbo_is_disabled(); + global.no_turbo = global.turbo_disabled; + + arch_set_max_freq_ratio(global.turbo_disabled); + + refresh_cpu_cap_scaling = hybrid_clear_max_perf_cpu(); intel_pstate_driver = driver; ret = cpufreq_register_driver(intel_pstate_driver); @@ -3167,6 +3394,8 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver) global.min_perf_pct = min_perf_pct_min(); + hybrid_init_cpu_capacity_scaling(refresh_cpu_cap_scaling); + return 0; } @@ -3388,14 +3617,13 @@ static inline void intel_pstate_request_control_from_smm(void) {} #define INTEL_PSTATE_HWP_BROADWELL 0x01 -#define X86_MATCH_HWP(model, hwp_mode) \ - X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_##model, \ - X86_FEATURE_HWP, hwp_mode) +#define X86_MATCH_HWP(vfm, hwp_mode) \ + X86_MATCH_VFM_FEATURE(vfm, X86_FEATURE_HWP, hwp_mode) static const struct x86_cpu_id hwp_support_ids[] __initconst = { - X86_MATCH_HWP(BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL), - X86_MATCH_HWP(BROADWELL_D, INTEL_PSTATE_HWP_BROADWELL), - X86_MATCH_HWP(ANY, 0), + X86_MATCH_HWP(INTEL_BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL), + X86_MATCH_HWP(INTEL_BROADWELL_D, INTEL_PSTATE_HWP_BROADWELL), + X86_MATCH_HWP(INTEL_ANY, 0), {} }; @@ -3407,19 +3635,47 @@ static bool intel_pstate_hwp_is_enabled(void) return !!(value & 0x1); } -static const struct x86_cpu_id intel_epp_balance_perf[] = { +#define POWERSAVE_MASK GENMASK(7, 0) +#define BALANCE_POWER_MASK GENMASK(15, 8) +#define BALANCE_PERFORMANCE_MASK GENMASK(23, 16) +#define PERFORMANCE_MASK GENMASK(31, 24) + +#define HWP_SET_EPP_VALUES(powersave, balance_power, balance_perf, performance) \ + (FIELD_PREP_CONST(POWERSAVE_MASK, powersave) |\ + FIELD_PREP_CONST(BALANCE_POWER_MASK, balance_power) |\ + FIELD_PREP_CONST(BALANCE_PERFORMANCE_MASK, balance_perf) |\ + FIELD_PREP_CONST(PERFORMANCE_MASK, performance)) + +#define HWP_SET_DEF_BALANCE_PERF_EPP(balance_perf) \ + (HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, HWP_EPP_BALANCE_POWERSAVE,\ + balance_perf, HWP_EPP_PERFORMANCE)) + +static const struct x86_cpu_id intel_epp_default[] = { /* * Set EPP value as 102, this is the max suggested EPP * which can result in one core turbo frequency for * AlderLake Mobile CPUs. */ - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 102), - X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 32), + X86_MATCH_VFM(INTEL_ALDERLAKE_L, HWP_SET_DEF_BALANCE_PERF_EPP(102)), + X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), + X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), + X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), + X86_MATCH_VFM(INTEL_GRANITERAPIDS_D, HWP_SET_DEF_BALANCE_PERF_EPP(32)), + X86_MATCH_VFM(INTEL_METEORLAKE_L, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, + 179, 64, 16)), + X86_MATCH_VFM(INTEL_ARROWLAKE, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, + 179, 64, 16)), {} }; static const struct x86_cpu_id intel_hybrid_scaling_factor[] = { - X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, HYBRID_SCALING_FACTOR_MTL), + X86_MATCH_VFM(INTEL_ALDERLAKE, HYBRID_SCALING_FACTOR_ADL), + X86_MATCH_VFM(INTEL_ALDERLAKE_L, HYBRID_SCALING_FACTOR_ADL), + X86_MATCH_VFM(INTEL_RAPTORLAKE, HYBRID_SCALING_FACTOR_ADL), + X86_MATCH_VFM(INTEL_RAPTORLAKE_P, HYBRID_SCALING_FACTOR_ADL), + X86_MATCH_VFM(INTEL_RAPTORLAKE_S, HYBRID_SCALING_FACTOR_ADL), + X86_MATCH_VFM(INTEL_METEORLAKE_L, HYBRID_SCALING_FACTOR_MTL), + X86_MATCH_VFM(INTEL_LUNARLAKE_M, HYBRID_SCALING_FACTOR_LNL), {} }; @@ -3451,7 +3707,7 @@ static int __init intel_pstate_init(void) * deal with it. */ if ((!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) || hwp_forced) { - WRITE_ONCE(hwp_active, 1); + hwp_active = true; hwp_mode_bdw = id->driver_data; intel_pstate.attr = hwp_cpufreq_attrs; intel_cpufreq.attr = hwp_cpufreq_attrs; @@ -3512,11 +3768,24 @@ hwp_cpu_matched: intel_pstate_sysfs_expose_params(); if (hwp_active) { - const struct x86_cpu_id *id = x86_match_cpu(intel_epp_balance_perf); + const struct x86_cpu_id *id = x86_match_cpu(intel_epp_default); const struct x86_cpu_id *hybrid_id = x86_match_cpu(intel_hybrid_scaling_factor); - if (id) - epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = id->driver_data; + if (id) { + epp_values[EPP_INDEX_POWERSAVE] = + FIELD_GET(POWERSAVE_MASK, id->driver_data); + epp_values[EPP_INDEX_BALANCE_POWERSAVE] = + FIELD_GET(BALANCE_POWER_MASK, id->driver_data); + epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = + FIELD_GET(BALANCE_PERFORMANCE_MASK, id->driver_data); + epp_values[EPP_INDEX_PERFORMANCE] = + FIELD_GET(PERFORMANCE_MASK, id->driver_data); + pr_debug("Updated EPPs powersave:%x balanced power:%x balanced perf:%x performance:%x\n", + epp_values[EPP_INDEX_POWERSAVE], + epp_values[EPP_INDEX_BALANCE_POWERSAVE], + epp_values[EPP_INDEX_BALANCE_PERFORMANCE], + epp_values[EPP_INDEX_PERFORMANCE]); + } if (hybrid_id) { hybrid_scaling_factor = hybrid_id->driver_data; |