From 0f127178892ec54553da2e3975e23979709b5ff9 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Mon, 22 Jul 2024 10:14:32 -0700 Subject: cpufreq: powerpc: add missing MODULE_DESCRIPTION() macros With ARCH=powerpc, make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/cpufreq/ppc-cbe-cpufreq.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/cpufreq/powernv-cpufreq.o Add the missing invocation of the MODULE_DESCRIPTION() macro to all files which have a MODULE_LICENSE(). This includes three additional files which, although they did not produce a warning with the powerpc allmodconfig configuration, may cause this warning with specific options enabled in the kernel configuration. Signed-off-by: Jeff Johnson Acked-by: Michael Ellerman (powerpc) Acked-by: Viresh Kumar Link: https://patch.msgid.link/20240722-md-powerpc-drivers-cpufreq-v2-1-bb84d715eb3d@quicinc.com Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/maple-cpufreq.c | 1 + drivers/cpufreq/pasemi-cpufreq.c | 1 + drivers/cpufreq/pmac64-cpufreq.c | 1 + drivers/cpufreq/powernv-cpufreq.c | 1 + drivers/cpufreq/ppc_cbe_cpufreq.c | 1 + 5 files changed, 5 insertions(+) diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c index f9306410a07f..690da85c4865 100644 --- a/drivers/cpufreq/maple-cpufreq.c +++ b/drivers/cpufreq/maple-cpufreq.c @@ -238,4 +238,5 @@ bail_noprops: module_init(maple_cpufreq_init); +MODULE_DESCRIPTION("cpufreq driver for Maple 970FX/970MP boards"); MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c index ee925b53b6b9..5fc9cb480516 100644 --- a/drivers/cpufreq/pasemi-cpufreq.c +++ b/drivers/cpufreq/pasemi-cpufreq.c @@ -269,5 +269,6 @@ static void __exit pas_cpufreq_exit(void) module_init(pas_cpufreq_init); module_exit(pas_cpufreq_exit); +MODULE_DESCRIPTION("cpufreq driver for PA Semi PWRficient"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Egor Martovetsky , Olof Johansson "); diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c index 2cd2b06849a2..9d3fe36075f8 100644 --- a/drivers/cpufreq/pmac64-cpufreq.c +++ b/drivers/cpufreq/pmac64-cpufreq.c @@ -671,4 +671,5 @@ static int __init g5_cpufreq_init(void) module_init(g5_cpufreq_init); +MODULE_DESCRIPTION("cpufreq driver for SMU & 970FX based G5 Macs"); MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 50c62929f7ca..bc55723b4d87 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -1160,5 +1160,6 @@ static void __exit powernv_cpufreq_exit(void) } module_exit(powernv_cpufreq_exit); +MODULE_DESCRIPTION("cpufreq driver for IBM/OpenPOWER powernv systems"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Vaidyanathan Srinivasan "); diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c index 5ee4c7bfdcc5..98595b3ea13f 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq.c @@ -168,5 +168,6 @@ static void __exit cbe_cpufreq_exit(void) module_init(cbe_cpufreq_init); module_exit(cbe_cpufreq_exit); +MODULE_DESCRIPTION("cpufreq driver for Cell BE processors"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Christian Krafft "); -- cgit From 166df51097a258a14fe9e946e2157f3b75eeb3c2 Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Fri, 19 Jul 2024 10:12:35 +0000 Subject: powercap/intel_rapl: Add support for AMD family 1Ah AMD Family 1Ah's RAPL MSRs are identical to Family 19h's, extend Family 19h's support to Family 1Ah. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Link: https://patch.msgid.link/20240719101234.50827-1-Dhananjay.Ugwekar@amd.com Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 3cffa6c79538..8b7a5a31e8c1 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -1285,6 +1285,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { X86_MATCH_VENDOR_FAM(AMD, 0x17, &rapl_defaults_amd), X86_MATCH_VENDOR_FAM(AMD, 0x19, &rapl_defaults_amd), + X86_MATCH_VENDOR_FAM(AMD, 0x1A, &rapl_defaults_amd), X86_MATCH_VENDOR_FAM(HYGON, 0x18, &rapl_defaults_amd), {} }; -- cgit From 37c6dccd683797a5a4ec85d2e94939bf829d3499 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Sun, 28 Jul 2024 20:26:59 +0100 Subject: cpufreq: Remove LATENCY_MULTIPLIER The current LATENCY_MULTIPLIER which has been around for nearly 20 years causes rate_limit_us to be always in ms range. On M1 mac mini I get 50 and 56us transition latency, but due to the 1000 multiplier we end up setting rate_limit_us to 50 and 56ms, which gets capped into 2ms and was 10ms before e13aa799c2a6 ("cpufreq: Change default transition delay to 2ms") On Intel I5 system transition latency is 20us but due to the multiplier we end up with 20ms that again is capped to 2ms. Given how good modern hardware and how modern workloads require systems to be more responsive to cater for sudden changes in workload (tasks sleeping/wakeup/migrating, uclamp causing a sudden boost or cap) and that 2ms is quarter of the time of 120Hz refresh rate system, drop the old logic in favour of providing 50% headroom. rate_limit_us = 1.5 * latency. I considered not adding any headroom which could mean that we can end up with infinite back-to-back requests. I also considered providing a constant headroom (e.g: 100us) assuming that any h/w or f/w dealing with the request shouldn't require a large headroom when transition_latency is actually high. But for both cases I wasn't sure if h/w or f/w can end up being overwhelmed dealing with the freq requests in a potentially busy system. So I opted for providing 50% breathing room. This is expected to impact schedutil only as the other user, dbs_governor, takes the max(2*tick, transition_delay_us) and the former was at least 2ms on 1ms TICK, which is equivalent to the max_delay_us before applying this patch. For systems with TICK of 4ms, this value would have almost always ended up with 8ms sampling rate. For systems that report 0 transition latency, we still default to returning 1ms as transition delay. This helps in eliminating a source of latency for applying requests as mentioned in [1]. For example if we have a 1ms tick, most systems will miss sending an update at tick when updating the util_avg for a task/CPU (rate_limit_us will be 2ms for most systems). Link: https://lore.kernel.org/lkml/20240724212255.mfr2ybiv2j2uqek7@airbuntu/ # [1] Link: https://lore.kernel.org/lkml/20240205022500.2232124-1-qyousef@layalina.io/ Signed-off-by: Qais Yousef Link: https://patch.msgid.link/20240728192659.58115-1-qyousef@layalina.io [ rjw: Subject edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 27 ++++----------------------- include/linux/cpufreq.h | 6 ------ 2 files changed, 4 insertions(+), 29 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 04fc786dd2c0..f98c9438760c 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -575,30 +575,11 @@ unsigned int cpufreq_policy_transition_delay_us(struct cpufreq_policy *policy) return policy->transition_delay_us; latency = policy->cpuinfo.transition_latency / NSEC_PER_USEC; - if (latency) { - unsigned int max_delay_us = 2 * MSEC_PER_SEC; + if (latency) + /* Give a 50% breathing room between updates */ + return latency + (latency >> 1); - /* - * If the platform already has high transition_latency, use it - * as-is. - */ - if (latency > max_delay_us) - return latency; - - /* - * For platforms that can change the frequency very fast (< 2 - * us), the above formula gives a decent transition delay. But - * for platforms where transition_latency is in milliseconds, it - * ends up giving unrealistic values. - * - * Cap the default transition delay to 2 ms, which seems to be - * a reasonable amount of time after which we should reevaluate - * the frequency. - */ - return min(latency * LATENCY_MULTIPLIER, max_delay_us); - } - - return LATENCY_MULTIPLIER; + return USEC_PER_MSEC; } EXPORT_SYMBOL_GPL(cpufreq_policy_transition_delay_us); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d4d2f4d1d7cb..e0e19d9c1323 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -577,12 +577,6 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, #define CPUFREQ_POLICY_POWERSAVE (1) #define CPUFREQ_POLICY_PERFORMANCE (2) -/* - * The polling frequency depends on the capability of the processor. Default - * polling frequency is 1000 times the transition latency of the processor. - */ -#define LATENCY_MULTIPLIER (1000) - struct cpufreq_governor { char name[CPUFREQ_NAME_LEN]; int (*init)(struct cpufreq_policy *policy); -- cgit From 6306653cd84f6faace2af646aedef2b90e61958a Mon Sep 17 00:00:00 2001 From: Xueqin Luo Date: Thu, 1 Aug 2024 16:31:55 +0800 Subject: PM: hibernate: Use sysfs_emit() and sysfs_emit_at() in "show" functions As Documentation/filesystems/sysfs.rst suggested, show() should only use sysfs_emit() or sysfs_emit_at() when formatting the value to be returned to user space. No functional change intended. Signed-off-by: Xueqin Luo Link: https://patch.msgid.link/20240801083156.2513508-2-luoxueqin@kylinos.cn Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 0a213f69a9e4..e35829d36039 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -1123,11 +1123,11 @@ static const char * const hibernation_modes[] = { static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { + ssize_t count = 0; int i; - char *start = buf; if (!hibernation_available()) - return sprintf(buf, "[disabled]\n"); + return sysfs_emit(buf, "[disabled]\n"); for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { if (!hibernation_modes[i]) @@ -1147,12 +1147,16 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr, continue; } if (i == hibernation_mode) - buf += sprintf(buf, "[%s] ", hibernation_modes[i]); + count += sysfs_emit_at(buf, count, "[%s] ", hibernation_modes[i]); else - buf += sprintf(buf, "%s ", hibernation_modes[i]); + count += sysfs_emit_at(buf, count, "%s ", hibernation_modes[i]); } - buf += sprintf(buf, "\n"); - return buf-start; + + /* Convert the last space to a newline if needed. */ + if (count > 0) + buf[count - 1] = '\n'; + + return count; } static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, @@ -1210,8 +1214,8 @@ power_attr(disk); static ssize_t resume_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%d:%d\n", MAJOR(swsusp_resume_device), - MINOR(swsusp_resume_device)); + return sysfs_emit(buf, "%d:%d\n", MAJOR(swsusp_resume_device), + MINOR(swsusp_resume_device)); } static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, @@ -1270,7 +1274,7 @@ power_attr(resume); static ssize_t resume_offset_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%llu\n", (unsigned long long)swsusp_resume_block); + return sysfs_emit(buf, "%llu\n", (unsigned long long)swsusp_resume_block); } static ssize_t resume_offset_store(struct kobject *kobj, @@ -1293,7 +1297,7 @@ power_attr(resume_offset); static ssize_t image_size_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%lu\n", image_size); + return sysfs_emit(buf, "%lu\n", image_size); } static ssize_t image_size_store(struct kobject *kobj, struct kobj_attribute *attr, @@ -1314,7 +1318,7 @@ power_attr(image_size); static ssize_t reserved_size_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%lu\n", reserved_size); + return sysfs_emit(buf, "%lu\n", reserved_size); } static ssize_t reserved_size_store(struct kobject *kobj, -- cgit From a712f03fe8b162c922bc9eb91758254fcf9bcb08 Mon Sep 17 00:00:00 2001 From: Xueqin Luo Date: Thu, 1 Aug 2024 16:31:56 +0800 Subject: PM: sleep: Use sysfs_emit() and sysfs_emit_at() in "show" functions As Documentation/filesystems/sysfs.rst suggested, show() should only use sysfs_emit() or sysfs_emit_at() when formatting the value to be returned to user space. No functional change intended. Signed-off-by: Xueqin Luo Link: https://patch.msgid.link/20240801083156.2513508-3-luoxueqin@kylinos.cn [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki --- kernel/power/main.c | 76 +++++++++++++++++++++++++++-------------------------- 1 file changed, 39 insertions(+), 37 deletions(-) diff --git a/kernel/power/main.c b/kernel/power/main.c index a9e0693aaf69..6254814d4817 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -115,7 +115,7 @@ int pm_async_enabled = 1; static ssize_t pm_async_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", pm_async_enabled); + return sysfs_emit(buf, "%d\n", pm_async_enabled); } static ssize_t pm_async_store(struct kobject *kobj, struct kobj_attribute *attr, @@ -139,7 +139,7 @@ power_attr(pm_async); static ssize_t mem_sleep_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - char *s = buf; + ssize_t count = 0; suspend_state_t i; for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) { @@ -149,17 +149,17 @@ static ssize_t mem_sleep_show(struct kobject *kobj, struct kobj_attribute *attr, const char *label = mem_sleep_states[i]; if (mem_sleep_current == i) - s += sprintf(s, "[%s] ", label); + count += sysfs_emit_at(buf, count, "[%s] ", label); else - s += sprintf(s, "%s ", label); + count += sysfs_emit_at(buf, count, "%s ", label); } } /* Convert the last space to a newline if needed. */ - if (s != buf) - *(s-1) = '\n'; + if (count > 0) + buf[count - 1] = '\n'; - return (s - buf); + return count; } static suspend_state_t decode_suspend_state(const char *buf, size_t n) @@ -220,7 +220,7 @@ bool sync_on_suspend_enabled = !IS_ENABLED(CONFIG_SUSPEND_SKIP_SYNC); static ssize_t sync_on_suspend_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", sync_on_suspend_enabled); + return sysfs_emit(buf, "%d\n", sync_on_suspend_enabled); } static ssize_t sync_on_suspend_store(struct kobject *kobj, @@ -257,22 +257,22 @@ static const char * const pm_tests[__TEST_AFTER_LAST] = { static ssize_t pm_test_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - char *s = buf; + ssize_t count = 0; int level; for (level = TEST_FIRST; level <= TEST_MAX; level++) if (pm_tests[level]) { if (level == pm_test_level) - s += sprintf(s, "[%s] ", pm_tests[level]); + count += sysfs_emit_at(buf, count, "[%s] ", pm_tests[level]); else - s += sprintf(s, "%s ", pm_tests[level]); + count += sysfs_emit_at(buf, count, "%s ", pm_tests[level]); } - if (s != buf) - /* convert the last space to a newline */ - *(s-1) = '\n'; + /* Convert the last space to a newline if needed. */ + if (count > 0) + buf[count - 1] = '\n'; - return (s - buf); + return count; } static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr, @@ -390,7 +390,7 @@ static const char * const suspend_step_names[] = { static ssize_t _name##_show(struct kobject *kobj, \ struct kobj_attribute *attr, char *buf) \ { \ - return sprintf(buf, format_str, suspend_stats._name); \ + return sysfs_emit(buf, format_str, suspend_stats._name);\ } \ static struct kobj_attribute _name = __ATTR_RO(_name) @@ -404,7 +404,7 @@ suspend_attr(max_hw_sleep, "%llu\n"); static ssize_t _name##_show(struct kobject *kobj, \ struct kobj_attribute *attr, char *buf) \ { \ - return sprintf(buf, "%u\n", \ + return sysfs_emit(buf, "%u\n", \ suspend_stats.step_failures[step-1]); \ } \ static struct kobj_attribute _name = __ATTR_RO(_name) @@ -428,7 +428,7 @@ static ssize_t last_failed_dev_show(struct kobject *kobj, index %= REC_FAILED_NUM; last_failed_dev = suspend_stats.failed_devs[index]; - return sprintf(buf, "%s\n", last_failed_dev); + return sysfs_emit(buf, "%s\n", last_failed_dev); } static struct kobj_attribute last_failed_dev = __ATTR_RO(last_failed_dev); @@ -442,7 +442,7 @@ static ssize_t last_failed_errno_show(struct kobject *kobj, index %= REC_FAILED_NUM; last_failed_errno = suspend_stats.errno[index]; - return sprintf(buf, "%d\n", last_failed_errno); + return sysfs_emit(buf, "%d\n", last_failed_errno); } static struct kobj_attribute last_failed_errno = __ATTR_RO(last_failed_errno); @@ -456,7 +456,7 @@ static ssize_t last_failed_step_show(struct kobject *kobj, index %= REC_FAILED_NUM; step = suspend_stats.failed_steps[index]; - return sprintf(buf, "%s\n", suspend_step_names[step]); + return sysfs_emit(buf, "%s\n", suspend_step_names[step]); } static struct kobj_attribute last_failed_step = __ATTR_RO(last_failed_step); @@ -571,7 +571,7 @@ bool pm_print_times_enabled; static ssize_t pm_print_times_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", pm_print_times_enabled); + return sysfs_emit(buf, "%d\n", pm_print_times_enabled); } static ssize_t pm_print_times_store(struct kobject *kobj, @@ -604,7 +604,7 @@ static ssize_t pm_wakeup_irq_show(struct kobject *kobj, if (!pm_wakeup_irq()) return -ENODATA; - return sprintf(buf, "%u\n", pm_wakeup_irq()); + return sysfs_emit(buf, "%u\n", pm_wakeup_irq()); } power_attr_ro(pm_wakeup_irq); @@ -620,7 +620,7 @@ EXPORT_SYMBOL_GPL(pm_debug_messages_should_print); static ssize_t pm_debug_messages_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", pm_debug_messages_on); + return sysfs_emit(buf, "%d\n", pm_debug_messages_on); } static ssize_t pm_debug_messages_store(struct kobject *kobj, @@ -668,21 +668,23 @@ struct kobject *power_kobj; static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - char *s = buf; + ssize_t count = 0; #ifdef CONFIG_SUSPEND suspend_state_t i; for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) if (pm_states[i]) - s += sprintf(s,"%s ", pm_states[i]); + count += sysfs_emit_at(buf, count, "%s ", pm_states[i]); #endif if (hibernation_available()) - s += sprintf(s, "disk "); - if (s != buf) - /* convert the last space to a newline */ - *(s-1) = '\n'; - return (s - buf); + count += sysfs_emit_at(buf, count, "disk "); + + /* Convert the last space to a newline if needed. */ + if (count > 0) + buf[count - 1] = '\n'; + + return count; } static suspend_state_t decode_state(const char *buf, size_t n) @@ -782,7 +784,7 @@ static ssize_t wakeup_count_show(struct kobject *kobj, unsigned int val; return pm_get_wakeup_count(&val, true) ? - sprintf(buf, "%u\n", val) : -EINTR; + sysfs_emit(buf, "%u\n", val) : -EINTR; } static ssize_t wakeup_count_store(struct kobject *kobj, @@ -824,17 +826,17 @@ static ssize_t autosleep_show(struct kobject *kobj, suspend_state_t state = pm_autosleep_state(); if (state == PM_SUSPEND_ON) - return sprintf(buf, "off\n"); + return sysfs_emit(buf, "off\n"); #ifdef CONFIG_SUSPEND if (state < PM_SUSPEND_MAX) - return sprintf(buf, "%s\n", pm_states[state] ? + return sysfs_emit(buf, "%s\n", pm_states[state] ? pm_states[state] : "error"); #endif #ifdef CONFIG_HIBERNATION - return sprintf(buf, "disk\n"); + return sysfs_emit(buf, "disk\n"); #else - return sprintf(buf, "error"); + return sysfs_emit(buf, "error\n"); #endif } @@ -903,7 +905,7 @@ int pm_trace_enabled; static ssize_t pm_trace_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", pm_trace_enabled); + return sysfs_emit(buf, "%d\n", pm_trace_enabled); } static ssize_t @@ -940,7 +942,7 @@ power_attr_ro(pm_trace_dev_match); static ssize_t pm_freeze_timeout_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%u\n", freeze_timeout_msecs); + return sysfs_emit(buf, "%u\n", freeze_timeout_msecs); } static ssize_t pm_freeze_timeout_store(struct kobject *kobj, -- cgit From 872cc94c7e3d874373f56b035f998631c6b26d22 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 31 Jul 2024 13:12:40 -0600 Subject: cpufreq: Use of_property_present() Use of_property_present() to test for property presence rather than of_(find|get)_property(). This is part of a larger effort to remove callers of of_find_property() and similar functions. of_find_property() leaks the DT struct property and data pointers which is a problem for dynamically allocated nodes which may be freed. Signed-off-by: Rob Herring (Arm) Acked-by: Chen-Yu Tsai Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt.c | 11 +++-------- drivers/cpufreq/pmac64-cpufreq.c | 2 +- drivers/cpufreq/sti-cpufreq.c | 2 +- drivers/cpufreq/sun50i-cpufreq-nvmem.c | 2 +- 4 files changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 6532c4d71338..983443396f8f 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -69,7 +69,6 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index) static const char *find_supply_name(struct device *dev) { struct device_node *np __free(device_node) = of_node_get(dev->of_node); - struct property *pp; int cpu = dev->id; /* This must be valid for sure */ @@ -77,14 +76,10 @@ static const char *find_supply_name(struct device *dev) return NULL; /* Try "cpu0" for older DTs */ - if (!cpu) { - pp = of_find_property(np, "cpu0-supply", NULL); - if (pp) - return "cpu0"; - } + if (!cpu && of_property_present(np, "cpu0-supply")) + return "cpu0"; - pp = of_find_property(np, "cpu-supply", NULL); - if (pp) + if (of_property_present(np, "cpu-supply")) return "cpu"; dev_dbg(dev, "no regulator for cpu%d\n", cpu); diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c index 2cd2b06849a2..c87cd6e0b638 100644 --- a/drivers/cpufreq/pmac64-cpufreq.c +++ b/drivers/cpufreq/pmac64-cpufreq.c @@ -505,7 +505,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode) continue; if (strcmp(loc, "CPU CLOCK")) continue; - if (!of_get_property(hwclock, "platform-get-frequency", NULL)) + if (!of_property_present(hwclock, "platform-get-frequency")) continue; break; } diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c index 8e2e703c3865..b15b3142b5fe 100644 --- a/drivers/cpufreq/sti-cpufreq.c +++ b/drivers/cpufreq/sti-cpufreq.c @@ -267,7 +267,7 @@ static int __init sti_cpufreq_init(void) goto skip_voltage_scaling; } - if (!of_get_property(ddata.cpu->of_node, "operating-points-v2", NULL)) { + if (!of_property_present(ddata.cpu->of_node, "operating-points-v2")) { dev_err(ddata.cpu, "OPP-v2 not supported\n"); goto skip_voltage_scaling; } diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c index 95ac8d46c156..293921acec93 100644 --- a/drivers/cpufreq/sun50i-cpufreq-nvmem.c +++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c @@ -146,7 +146,7 @@ static bool dt_has_supported_hw(void) return false; for_each_child_of_node_scoped(np, opp) { - if (of_find_property(opp, "opp-supported-hw", NULL)) { + if (of_property_present(opp, "opp-supported-hw")) { has_opp_supported_hw = true; break; } -- cgit From 9b3cc56c24d80671763b4b6b7ea4aed213059d32 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 31 Jul 2024 14:16:15 -0600 Subject: cpufreq: spear: Use of_property_for_each_u32() instead of open coding Use of_property_count_u32_elems() and of_property_for_each_u32() instead of of_find_property() and open coding the property parsing. This is part of a larger effort to remove callers of of_find_property() and similar functions. of_find_property() leaks the DT struct property and data pointers which is a problem for dynamically allocated nodes which may be freed. Signed-off-by: Rob Herring (Arm) [Viresh: Fixed build failure and initialize / increment 'i'] Signed-off-by: Viresh Kumar --- drivers/cpufreq/spear-cpufreq.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/spear-cpufreq.c b/drivers/cpufreq/spear-cpufreq.c index 78b875db6b66..d8ab5b01d46d 100644 --- a/drivers/cpufreq/spear-cpufreq.c +++ b/drivers/cpufreq/spear-cpufreq.c @@ -171,10 +171,9 @@ static struct cpufreq_driver spear_cpufreq_driver = { static int spear_cpufreq_probe(struct platform_device *pdev) { struct device_node *np; - const struct property *prop; struct cpufreq_frequency_table *freq_tbl; - const __be32 *val; - int cnt, i, ret; + u32 val; + int cnt, ret, i = 0; np = of_cpu_device_node_get(0); if (!np) { @@ -186,26 +185,23 @@ static int spear_cpufreq_probe(struct platform_device *pdev) &spear_cpufreq.transition_latency)) spear_cpufreq.transition_latency = CPUFREQ_ETERNAL; - prop = of_find_property(np, "cpufreq_tbl", NULL); - if (!prop || !prop->value) { + cnt = of_property_count_u32_elems(np, "cpufreq_tbl"); + if (cnt <= 0) { pr_err("Invalid cpufreq_tbl\n"); ret = -ENODEV; goto out_put_node; } - cnt = prop->length / sizeof(u32); - val = prop->value; - freq_tbl = kcalloc(cnt + 1, sizeof(*freq_tbl), GFP_KERNEL); if (!freq_tbl) { ret = -ENOMEM; goto out_put_node; } - for (i = 0; i < cnt; i++) - freq_tbl[i].frequency = be32_to_cpup(val++); + of_property_for_each_u32(np, "cpufreq_tbl", val) + freq_tbl[i++].frequency = val; - freq_tbl[i].frequency = CPUFREQ_TABLE_END; + freq_tbl[cnt].frequency = CPUFREQ_TABLE_END; spear_cpufreq.freq_tbl = freq_tbl; -- cgit From 5c326d24c1b31a275afc05b4fbb7292eabd4d70a Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Tue, 6 Aug 2024 07:58:22 -0600 Subject: cpufreq: qcom: Add explicit io.h include for readl/writel_relaxed The qcom-cpufreq-hw driver is relying on an implicit include of io.h which doesn't get included on some architectures. Signed-off-by: Rob Herring (Arm) Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 370fe6a0104b..900d6844c43d 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include -- cgit From b81f97031a0ea2823cf37bb37eac51dbb8350e02 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Tue, 6 Aug 2024 07:58:23 -0600 Subject: cpufreq: omap: Drop asm includes The omap driver doesn't actually need asm/smp_plat.h, so drop it. asm/cpu.h is not needed either as linux/cpu.h is already included. Signed-off-by: Rob Herring (Arm) Acked-by: Kevin Hilman Signed-off-by: Viresh Kumar --- drivers/cpufreq/omap-cpufreq.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c index 3458d5cc9b7f..de8be0a8932d 100644 --- a/drivers/cpufreq/omap-cpufreq.c +++ b/drivers/cpufreq/omap-cpufreq.c @@ -28,9 +28,6 @@ #include #include -#include -#include - /* OPP tolerance in percentage */ #define OPP_TOLERANCE 4 -- cgit From 1ffec650d07f63a584d2a789a20bb64e9b13d34b Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Tue, 6 Aug 2024 07:58:24 -0600 Subject: cpufreq: armada-8k: Avoid excessive stack usage In some build configurations (e.g. x86 allmodconfig), 2 cpu_mask variables exceeds the max stack frame size: drivers/cpufreq/armada-8k-cpufreq.c:203:1: error: the frame size of 2128 bytes is larger than 2048 bytes Fix it by making "cpus" static which is fine given that module init is only called once. Signed-off-by: Rob Herring (Arm) Signed-off-by: Viresh Kumar --- drivers/cpufreq/armada-8k-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/armada-8k-cpufreq.c b/drivers/cpufreq/armada-8k-cpufreq.c index ce5a5641b6dd..7a979db81f09 100644 --- a/drivers/cpufreq/armada-8k-cpufreq.c +++ b/drivers/cpufreq/armada-8k-cpufreq.c @@ -132,7 +132,7 @@ static int __init armada_8k_cpufreq_init(void) int ret = 0, opps_index = 0, cpu, nb_cpus; struct freq_table *freq_tables; struct device_node *node; - struct cpumask cpus; + static struct cpumask cpus; node = of_find_matching_node_and_match(NULL, armada_8k_cpufreq_of_match, NULL); -- cgit From 76fb981ad6774b82f06703c896b492c8659b543b Mon Sep 17 00:00:00 2001 From: Aboorva Devarajan Date: Fri, 9 Aug 2024 14:07:28 +0530 Subject: tools/cpupower: display residency value in idle-info Update cpuidle tool to display the residency value of cpuidle states. This addition provides a clearer and more detailed view of idle state information when using cpuidle-info. -------------------------------- Before Patch: -------------------------------- $ cpupower idle-info CPUidle driver: intel_idle CPUidle governor: menu analyzing CPU 28: Number of idle states: 3 Available idle states: POLL C1 C1E POLL: Flags/Description: CPUIDLE CORE POLL IDLE Latency: 0 Usage: 7448 Duration: 207170 C1: Flags/Description: MWAIT 0x00 Latency: 2 Usage: 7023 Duration: 3736853 C1E: Flags/Description: MWAIT 0x01 Latency: 10 Usage: 18468 Duration: 11396212 -------------------------------- After Patch: -------------------------------- $ cpupower idle-info CPUidle driver: intel_idle CPUidle governor: menu analyzing CPU 12: Number of idle states: 3 Available idle states: POLL C1 C1E POLL: Flags/Description: CPUIDLE CORE POLL IDLE Latency: 0 Residency: 0 Usage: 1950 Duration: 38458 C1: Flags/Description: MWAIT 0x00 Latency: 2 Residency: 2 Usage: 10688 Duration: 7133020 C1E: Flags/Description: MWAIT 0x01 Latency: 10 Residency: 20 Usage: 22356 Duration: 15687259 -------------------------------- Signed-off-by: Aboorva Devarajan Signed-off-by: Shuah Khan --- tools/power/cpupower/lib/cpuidle.c | 8 ++++++++ tools/power/cpupower/lib/cpuidle.h | 2 ++ tools/power/cpupower/utils/cpuidle-info.c | 4 ++++ 3 files changed, 14 insertions(+) diff --git a/tools/power/cpupower/lib/cpuidle.c b/tools/power/cpupower/lib/cpuidle.c index 479c5971aa6d..0ecac009273c 100644 --- a/tools/power/cpupower/lib/cpuidle.c +++ b/tools/power/cpupower/lib/cpuidle.c @@ -116,6 +116,7 @@ enum idlestate_value { IDLESTATE_USAGE, IDLESTATE_POWER, IDLESTATE_LATENCY, + IDLESTATE_RESIDENCY, IDLESTATE_TIME, IDLESTATE_DISABLE, MAX_IDLESTATE_VALUE_FILES @@ -125,6 +126,7 @@ static const char *idlestate_value_files[MAX_IDLESTATE_VALUE_FILES] = { [IDLESTATE_USAGE] = "usage", [IDLESTATE_POWER] = "power", [IDLESTATE_LATENCY] = "latency", + [IDLESTATE_RESIDENCY] = "residency", [IDLESTATE_TIME] = "time", [IDLESTATE_DISABLE] = "disable", }; @@ -254,6 +256,12 @@ unsigned long cpuidle_state_latency(unsigned int cpu, return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_LATENCY); } +unsigned long cpuidle_state_residency(unsigned int cpu, + unsigned int idlestate) +{ + return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_RESIDENCY); +} + unsigned long cpuidle_state_usage(unsigned int cpu, unsigned int idlestate) { diff --git a/tools/power/cpupower/lib/cpuidle.h b/tools/power/cpupower/lib/cpuidle.h index 2e10fead2e1e..2ab404d40259 100644 --- a/tools/power/cpupower/lib/cpuidle.h +++ b/tools/power/cpupower/lib/cpuidle.h @@ -8,6 +8,8 @@ int cpuidle_state_disable(unsigned int cpu, unsigned int idlestate, unsigned int disable); unsigned long cpuidle_state_latency(unsigned int cpu, unsigned int idlestate); +unsigned long cpuidle_state_residency(unsigned int cpu, + unsigned int idlestate); unsigned long cpuidle_state_usage(unsigned int cpu, unsigned int idlestate); unsigned long long cpuidle_state_time(unsigned int cpu, diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c index 44126a87fa7a..e0d17f0de3fe 100644 --- a/tools/power/cpupower/utils/cpuidle-info.c +++ b/tools/power/cpupower/utils/cpuidle-info.c @@ -64,6 +64,8 @@ static void cpuidle_cpu_output(unsigned int cpu, int verbose) printf(_("Latency: %lu\n"), cpuidle_state_latency(cpu, idlestate)); + printf(_("Residency: %lu\n"), + cpuidle_state_residency(cpu, idlestate)); printf(_("Usage: %lu\n"), cpuidle_state_usage(cpu, idlestate)); printf(_("Duration: %llu\n"), @@ -115,6 +117,8 @@ static void proc_cpuidle_cpu_output(unsigned int cpu) printf(_("promotion[--] demotion[--] ")); printf(_("latency[%03lu] "), cpuidle_state_latency(cpu, cstate)); + printf(_("residency[%05lu] "), + cpuidle_state_residency(cpu, cstate)); printf(_("usage[%08lu] "), cpuidle_state_usage(cpu, cstate)); printf(_("duration[%020Lu] \n"), -- cgit From 26096aed255fbac9501718174dbb24c935d8854e Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Tue, 30 Jul 2024 04:49:19 +0000 Subject: powercap/intel_rapl: Fix the energy-pkg event for AMD CPUs After commit ("x86/cpu/topology: Add support for the AMD 0x80000026 leaf"), on AMD processors that support extended CPUID leaf 0x80000026, the topology_logical_die_id() macros, no longer returns package id, instead it returns the CCD (Core Complex Die) id. This leads to the energy-pkg event scope to be modified to CCD instead of package. For more historical context, please refer to commit 32fb480e0a2c ("powercap/intel_rapl: Support multi-die/package"), which initially changed the RAPL scope from package to die for all systems, as Intel systems with Die enumeration have RAPL scope as die, and those without die enumeration are not affected. So, all systems(Intel, AMD, Hygon), worked correctly with topology_logical_die_id() until recently, but this changed after the "0x80000026 leaf" commit mentioned above. Future multi-die Intel systems will have package scope RAPL counters, but they will be using TPMI RAPL interface, which is not affected by this change. Replacing topology_logical_die_id() with topology_physical_package_id() conditionally only for AMD and Hygon fixes the energy-pkg event. On an AMD 2 socket 8 CCD Zen4 server: Before: linux$ ls /sys/class/powercap/ intel-rapl intel-rapl:4 intel-rapl:8:0 intel-rapl:d intel-rapl:0 intel-rapl:4:0 intel-rapl:9 intel-rapl:d:0 intel-rapl:0:0 intel-rapl:5 intel-rapl:9:0 intel-rapl:e intel-rapl:1 intel-rapl:5:0 intel-rapl:a intel-rapl:e:0 intel-rapl:1:0 intel-rapl:6 intel-rapl:a:0 intel-rapl:f intel-rapl:2 intel-rapl:6:0 intel-rapl:b intel-rapl:f:0 intel-rapl:2:0 intel-rapl:7 intel-rapl:b:0 intel-rapl:3 intel-rapl:7:0 intel-rapl:c intel-rapl:3:0 intel-rapl:8 intel-rapl:c:0 After: linux$ ls /sys/class/powercap/ intel-rapl intel-rapl:0 intel-rapl:0:0 intel-rapl:1 intel-rapl:1:0 Only one sysfs entry per-event per-package is created after this change. Fixes: 63edbaa48a57 ("x86/cpu/topology: Add support for the AMD 0x80000026 leaf") Reported-by: Michael Larabel Signed-off-by: Dhananjay Ugwekar Reviewed-by: Zhang Rui Link: https://patch.msgid.link/20240730044917.4680-3-Dhananjay.Ugwekar@amd.com Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 8b7a5a31e8c1..5a6d05bf847c 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -2129,6 +2129,21 @@ void rapl_remove_package(struct rapl_package *rp) } EXPORT_SYMBOL_GPL(rapl_remove_package); +/* + * RAPL Package energy counter scope: + * 1. AMD/HYGON platforms use per-PKG package energy counter + * 2. For Intel platforms + * 2.1 CLX-AP platform has per-DIE package energy counter + * 2.2 Other platforms that uses MSR RAPL are single die systems so the + * package energy counter can be considered as per-PKG/per-DIE, + * here it is considered as per-DIE. + * 2.3 New platforms that use TPMI RAPL doesn't care about the + * scope because they are not MSR/CPU based. + */ +#define rapl_msrs_are_pkg_scope() \ + (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || \ + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + /* caller to ensure CPU hotplug lock is held */ struct rapl_package *rapl_find_package_domain_cpuslocked(int id, struct rapl_if_priv *priv, bool id_is_cpu) @@ -2136,8 +2151,14 @@ struct rapl_package *rapl_find_package_domain_cpuslocked(int id, struct rapl_if_ struct rapl_package *rp; int uid; - if (id_is_cpu) - uid = topology_logical_die_id(id); + if (id_is_cpu) { + uid = rapl_msrs_are_pkg_scope() ? + topology_physical_package_id(id) : topology_logical_die_id(id); + if (uid < 0) { + pr_err("topology_logical_(package/die)_id() returned a negative value"); + return ERR_PTR(-EINVAL); + } + } else uid = id; @@ -2169,9 +2190,14 @@ struct rapl_package *rapl_add_package_cpuslocked(int id, struct rapl_if_priv *pr return ERR_PTR(-ENOMEM); if (id_is_cpu) { - rp->id = topology_logical_die_id(id); + rp->id = rapl_msrs_are_pkg_scope() ? + topology_physical_package_id(id) : topology_logical_die_id(id); + if ((int)(rp->id) < 0) { + pr_err("topology_logical_(package/die)_id() returned a negative value"); + return ERR_PTR(-EINVAL); + } rp->lead_cpu = id; - if (topology_max_dies_per_package() > 1) + if (!rapl_msrs_are_pkg_scope() && topology_max_dies_per_package() > 1) snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d-die-%d", topology_physical_package_id(id), topology_die_id(id)); else -- cgit From eca0f1b0bbf9f1082993206e3430d9da391be366 Mon Sep 17 00:00:00 2001 From: Sumeet Pawnikar Date: Fri, 16 Aug 2024 17:03:32 +0530 Subject: powercap: intel_rapl: Add support for ArrowLake-U platform Add support for ArrowLake-U platform to the RAPL common driver. Signed-off-by: Sumeet Pawnikar Acked-by: Zhang Rui Link: https://patch.msgid.link/20240816113332.7408-1-sumeet.r.pawnikar@intel.com Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 5a6d05bf847c..d110c5da5cfe 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -1267,6 +1267,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { X86_MATCH_VFM(INTEL_LUNARLAKE_M, &rapl_defaults_core), X86_MATCH_VFM(INTEL_ARROWLAKE_H, &rapl_defaults_core), X86_MATCH_VFM(INTEL_ARROWLAKE, &rapl_defaults_core), + X86_MATCH_VFM(INTEL_ARROWLAKE_U, &rapl_defaults_core), X86_MATCH_VFM(INTEL_LAKEFIELD, &rapl_defaults_core), X86_MATCH_VFM(INTEL_ATOM_SILVERMONT, &rapl_defaults_byt), -- cgit From 3ca2a3d1e7271549985dc57c8106bb07b3134ecb Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 2 Aug 2024 11:48:39 -0700 Subject: cpufreq: intel_pstate: Support Granite Rapids and Sierra Forest OOB mode Prevent intel_pstate from loading when OOB (Out Of Band) P-states mode is enabled. The OOB identifying bits are same as for the prior generation CPUs like Emerald Rapids servers. Add Granite Rapids and Sierra Forest CPU models to intel_pstate_cpu_oob_ids[]. Signed-off-by: Srinivas Pandruvada Link: https://patch.msgid.link/20240802184839.1909091-1-srinivas.pandruvada@linux.intel.com Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index c0278d023cfc..c69b420d0bc9 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2425,6 +2425,10 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { X86_MATCH(INTEL_ICELAKE_X, core_funcs), X86_MATCH(INTEL_SAPPHIRERAPIDS_X, core_funcs), X86_MATCH(INTEL_EMERALDRAPIDS_X, core_funcs), + X86_MATCH(INTEL_GRANITERAPIDS_D, core_funcs), + X86_MATCH(INTEL_GRANITERAPIDS_X, core_funcs), + X86_MATCH(INTEL_ATOM_CRESTMONT, core_funcs), + X86_MATCH(INTEL_ATOM_CRESTMONT_X, core_funcs), {} }; #endif -- cgit From 370406bf5738dade8ac95a2ee95c29299d4ac902 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 6 Aug 2024 19:03:10 +0300 Subject: intel_idle: add Granite Rapids Xeon support Add Granite Rapids Xeon C-states, which are C1, C1E, C6, and C6P. Comparing to previous Xeon Generations (e.g., Emerald Rapids), C6 requests end up only in core C6 state, and no package C-state promotion takes place even if all cores in the package are in core C6. C6P requests also end up in core C6, but if all cores have requested C6P, the SoC will enter the package C6 state. Signed-off-by: Artem Bityutskiy Link: https://patch.msgid.link/20240806160310.3719205-1-artem.bityutskiy@linux.intel.com [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 9aab7abc2ae9..e20f57ac307e 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1022,6 +1022,45 @@ static struct cpuidle_state spr_cstates[] __initdata = { .enter = NULL } }; +static struct cpuidle_state gnr_cstates[] __initdata = { + { + .name = "C1", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00), + .exit_latency = 1, + .target_residency = 1, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 4, + .target_residency = 4, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | + CPUIDLE_FLAG_INIT_XSTATE, + .exit_latency = 170, + .target_residency = 650, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6P", + .desc = "MWAIT 0x21", + .flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED | + CPUIDLE_FLAG_INIT_XSTATE, + .exit_latency = 210, + .target_residency = 1000, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + static struct cpuidle_state atom_cstates[] __initdata = { { .name = "C1E", @@ -1453,6 +1492,12 @@ static const struct idle_cpu idle_cpu_spr __initconst = { .use_acpi = true, }; +static const struct idle_cpu idle_cpu_gnr __initconst = { + .state_table = gnr_cstates, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + static const struct idle_cpu idle_cpu_avn __initconst = { .state_table = avn_cstates, .disable_promotion_to_c1e = true, @@ -1533,6 +1578,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &idle_cpu_gmt), X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &idle_cpu_spr), X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &idle_cpu_spr), + X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &idle_cpu_gnr), X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &idle_cpu_knl), X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &idle_cpu_knl), X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &idle_cpu_bxt), -- cgit From 5bb33212b5c664396e5de4cd5a2999abb84a3978 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 20 Aug 2024 12:11:28 +0800 Subject: intel_idle: Disable promotion to C1E on Jasper Lake and Elkhart Lake PCIe ethernet throughut is sub-optimal on Jasper Lake and Elkhart Lake. The CPU can take long time to exit to C0 to handle IRQ and perform DMA when C1E has been entered. For this reason, adjust intel_idle to disable promotion to C1E and still use C-states from ACPI _CST on those two platforms. Link: https://bugzilla.kernel.org/show_bug.cgi?id=219023 Signed-off-by: Kai-Heng Feng Link: https://patch.msgid.link/20240820041128.102452-1-kai.heng.feng@canonical.com [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index e20f57ac307e..9457e34b9e32 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1520,6 +1520,10 @@ static const struct idle_cpu idle_cpu_dnv __initconst = { .use_acpi = true, }; +static const struct idle_cpu idle_cpu_tmt __initconst = { + .disable_promotion_to_c1e = true, +}; + static const struct idle_cpu idle_cpu_snr __initconst = { .state_table = snr_cstates, .disable_promotion_to_c1e = true, @@ -1584,6 +1588,8 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &idle_cpu_bxt), X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D, &idle_cpu_dnv), + X86_MATCH_VFM(INTEL_ATOM_TREMONT, &idle_cpu_tmt), + X86_MATCH_VFM(INTEL_ATOM_TREMONT_L, &idle_cpu_tmt), X86_MATCH_VFM(INTEL_ATOM_TREMONT_D, &idle_cpu_snr), X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &idle_cpu_grr), X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &idle_cpu_srf), @@ -2121,7 +2127,7 @@ static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) drv->state_count = 1; - if (icpu) + if (icpu && icpu->state_table) intel_idle_init_cstates_icpu(drv); else intel_idle_init_cstates_acpi(drv); @@ -2255,7 +2261,11 @@ static int __init intel_idle_init(void) icpu = (const struct idle_cpu *)id->driver_data; if (icpu) { - cpuidle_state_table = icpu->state_table; + if (icpu->state_table) + cpuidle_state_table = icpu->state_table; + else if (!intel_idle_acpi_cst_extract()) + return -ENODEV; + auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; if (icpu->disable_promotion_to_c1e) c1e_promotion = C1E_PROMOTION_DISABLE; -- cgit From a309320ddbac6b1583224fcb6bacd424bcf8637f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 20 Aug 2024 11:40:22 +0200 Subject: cpuidle: riscv-sbi: Use scoped device node handling to fix missing of_node_put Two return statements in sbi_cpuidle_dt_init_states() did not drop the OF node reference count. Solve the issue and simplify entire error handling with scoped/cleanup.h. Fixes: 6abf32f1d9c5 ("cpuidle: Add RISC-V SBI CPU idle driver") Cc: All applicable Signed-off-by: Krzysztof Kozlowski Reviewed-by: Anup Patel Link: https://patch.msgid.link/20240820094023.61155-1-krzysztof.kozlowski@linaro.org Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-riscv-sbi.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/cpuidle/cpuidle-riscv-sbi.c b/drivers/cpuidle/cpuidle-riscv-sbi.c index a6e123dfe394..5bb3401220d2 100644 --- a/drivers/cpuidle/cpuidle-riscv-sbi.c +++ b/drivers/cpuidle/cpuidle-riscv-sbi.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) "cpuidle-riscv-sbi: " fmt +#include #include #include #include @@ -236,19 +237,16 @@ static int sbi_cpuidle_dt_init_states(struct device *dev, { struct sbi_cpuidle_data *data = per_cpu_ptr(&sbi_cpuidle_data, cpu); struct device_node *state_node; - struct device_node *cpu_node; u32 *states; int i, ret; - cpu_node = of_cpu_device_node_get(cpu); + struct device_node *cpu_node __free(device_node) = of_cpu_device_node_get(cpu); if (!cpu_node) return -ENODEV; states = devm_kcalloc(dev, state_count, sizeof(*states), GFP_KERNEL); - if (!states) { - ret = -ENOMEM; - goto fail; - } + if (!states) + return -ENOMEM; /* Parse SBI specific details from state DT nodes */ for (i = 1; i < state_count; i++) { @@ -264,10 +262,8 @@ static int sbi_cpuidle_dt_init_states(struct device *dev, pr_debug("sbi-state %#x index %d\n", states[i], i); } - if (i != state_count) { - ret = -ENODEV; - goto fail; - } + if (i != state_count) + return -ENODEV; /* Initialize optional data, used for the hierarchical topology. */ ret = sbi_dt_cpu_init_topology(drv, data, state_count, cpu); @@ -277,10 +273,7 @@ static int sbi_cpuidle_dt_init_states(struct device *dev, /* Store states in the per-cpu struct. */ data->states = states; -fail: - of_node_put(cpu_node); - - return ret; + return 0; } static void sbi_cpuidle_deinit_cpu(int cpu) -- cgit From d5c667e049ee75e82f3233eda0d44e39e8d492cf Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 20 Aug 2024 11:40:23 +0200 Subject: cpuidle: riscv-sbi: Simplify with scoped for each OF child loop Use scoped for_each_child_of_node_scoped() when iterating over device nodes to make code a bit simpler. Reviewed-by: Jonathan Cameron Signed-off-by: Krzysztof Kozlowski Reviewed-by: Anup Patel Link: https://patch.msgid.link/20240820094023.61155-2-krzysztof.kozlowski@linaro.org Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-riscv-sbi.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/cpuidle/cpuidle-riscv-sbi.c b/drivers/cpuidle/cpuidle-riscv-sbi.c index 5bb3401220d2..d228b4d18d56 100644 --- a/drivers/cpuidle/cpuidle-riscv-sbi.c +++ b/drivers/cpuidle/cpuidle-riscv-sbi.c @@ -448,7 +448,6 @@ static void sbi_pd_remove(void) static int sbi_genpd_probe(struct device_node *np) { - struct device_node *node; int ret = 0, pd_count = 0; if (!np) @@ -458,13 +457,13 @@ static int sbi_genpd_probe(struct device_node *np) * Parse child nodes for the "#power-domain-cells" property and * initialize a genpd/genpd-of-provider pair when it's found. */ - for_each_child_of_node(np, node) { + for_each_child_of_node_scoped(np, node) { if (!of_property_present(node, "#power-domain-cells")) continue; ret = sbi_pd_init(node); if (ret) - goto put_node; + goto remove_pd; pd_count++; } @@ -480,8 +479,6 @@ static int sbi_genpd_probe(struct device_node *np) return 0; -put_node: - of_node_put(node); remove_pd: sbi_pd_remove(); pr_err("failed to create CPU PM domains ret=%d\n", ret); -- cgit From 95f6580352a7225e619551febb83595bcb77ab17 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 20 Aug 2024 11:41:34 +0300 Subject: powercap: intel_rapl: Fix off by one in get_rpi() The rp->priv->rpi array is either rpi_msr or rpi_tpmi which have NR_RAPL_PRIMITIVES number of elements. Thus the > needs to be >= to prevent an off by one access. Fixes: 98ff639a7289 ("powercap: intel_rapl: Support per Interface primitive information") Signed-off-by: Dan Carpenter Acked-by: Zhang Rui Link: https://patch.msgid.link/86e3a059-504d-4795-a5ea-4a653f3b41f8@stanley.mountain Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index d110c5da5cfe..a63a4f04063e 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -740,7 +740,7 @@ static struct rapl_primitive_info *get_rpi(struct rapl_package *rp, int prim) { struct rapl_primitive_info *rpi = rp->priv->rpi; - if (prim < 0 || prim > NR_RAPL_PRIMITIVES || !rpi) + if (prim < 0 || prim >= NR_RAPL_PRIMITIVES || !rpi) return NULL; return &rpi[prim]; -- cgit From 6baacf9391c03f91a7644b3f94634b7d4b2c5e34 Mon Sep 17 00:00:00 2001 From: Dhruva Gole Date: Wed, 21 Aug 2024 17:12:50 +0530 Subject: cpuidle: remove dead code from cpuidle_enter_state() Checking for index < 0 is useless because the find_deepest_state() function never really returns a negative value. Since this hasn't been reported in over 9 years it's dead code, so remove it. Signed-off-by: Dhruva Gole Link: https://patch.msgid.link/20240821114250.1416421-1-d-gole@ti.com [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 02e40fd7d948..9e418aec1755 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -228,10 +228,7 @@ noinstr int cpuidle_enter_state(struct cpuidle_device *dev, if (broadcast && tick_broadcast_enter()) { index = find_deepest_state(drv, dev, target_state->exit_latency_ns, CPUIDLE_FLAG_TIMER_STOP, false); - if (index < 0) { - default_idle_call(); - return -EBUSY; - } + target_state = &drv->states[index]; broadcast = false; } -- cgit From 6b08b4ee5e60d8789aeb87250f718ef14bebb90e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 23 Aug 2024 15:51:36 +0300 Subject: powercap: intel_rapl: Change an error pointer to NULL The rapl_find_package_domain_cpuslocked() function is supposed to return NULL on error. This new error patch returns ERR_PTR(-EINVAL) but none of the callers check for that so it would lead to an Oops. Fixes: 26096aed255f ("powercap/intel_rapl: Fix the energy-pkg event for AMD CPUs") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/fa719c6a-8d3b-4cca-9b43-bcd477ff6655@stanley.mountain Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index a63a4f04063e..5e793b80fd6b 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -2157,7 +2157,7 @@ struct rapl_package *rapl_find_package_domain_cpuslocked(int id, struct rapl_if_ topology_physical_package_id(id) : topology_logical_die_id(id); if (uid < 0) { pr_err("topology_logical_(package/die)_id() returned a negative value"); - return ERR_PTR(-EINVAL); + return NULL; } } else -- cgit From 5a9d10145a54f7a3fb6297c0082bf030e04db3bc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 28 Aug 2024 13:47:25 +0200 Subject: x86/sched: Add basic support for CPU capacity scaling In order be able to compute the sizes of tasks consistently across all CPUs in a hybrid system, it is necessary to provide CPU capacity scaling information to the scheduler via arch_scale_cpu_capacity(). Moreover, the value returned by arch_scale_freq_capacity() for the given CPU must correspond to the arch_scale_cpu_capacity() return value for it, or utilization computations will be inaccurate. Add support for it through per-CPU variables holding the capacity and maximum-to-base frequency ratio (times SCHED_CAPACITY_SCALE) that will be returned by arch_scale_cpu_capacity() and used by scale_freq_tick() to compute arch_freq_scale for the current CPU, respectively. In order to avoid adding measurable overhead for non-hybrid x86 systems, which are the vast majority in the field, whether or not the new hybrid CPU capacity scaling will be in effect is controlled by a static key. This static key is set by calling arch_enable_hybrid_capacity_scale() which also allocates memory for the per-CPU data and initializes it. Next, arch_set_cpu_capacity() is used to set the per-CPU variables mentioned above for each CPU and arch_rebuild_sched_domains() needs to be called for the scheduler to realize that capacity-aware scheduling can be used going forward. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Reviewed-by: Ricardo Neri Tested-by: Ricardo Neri # scale invariance Link: https://patch.msgid.link/10523497.nUPlyArG6x@rjwysocki.net [ rjw: Added parens to function kerneldoc comments ] Signed-off-by: Rafael J. Wysocki --- arch/x86/include/asm/topology.h | 13 ++++++ arch/x86/kernel/cpu/aperfmperf.c | 89 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 100 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index abe3a8f22cbd..aef70336d624 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -282,9 +282,22 @@ static inline long arch_scale_freq_capacity(int cpu) } #define arch_scale_freq_capacity arch_scale_freq_capacity +bool arch_enable_hybrid_capacity_scale(void); +void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap, + unsigned long cap_freq, unsigned long base_freq); + +unsigned long arch_scale_cpu_capacity(int cpu); +#define arch_scale_cpu_capacity arch_scale_cpu_capacity + extern void arch_set_max_freq_ratio(bool turbo_disabled); extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled); #else +static inline bool arch_enable_hybrid_capacity_scale(void) { return false; } +static inline void arch_set_cpu_capacity(int cpu, unsigned long cap, + unsigned long max_cap, + unsigned long cap_freq, + unsigned long base_freq) { } + static inline void arch_set_max_freq_ratio(bool turbo_disabled) { } static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { } #endif diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index 0b69bfbf345d..f642de2ebdac 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -349,9 +349,89 @@ static DECLARE_WORK(disable_freq_invariance_work, DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE; EXPORT_PER_CPU_SYMBOL_GPL(arch_freq_scale); +static DEFINE_STATIC_KEY_FALSE(arch_hybrid_cap_scale_key); + +struct arch_hybrid_cpu_scale { + unsigned long capacity; + unsigned long freq_ratio; +}; + +static struct arch_hybrid_cpu_scale __percpu *arch_cpu_scale; + +/** + * arch_enable_hybrid_capacity_scale() - Enable hybrid CPU capacity scaling + * + * Allocate memory for per-CPU data used by hybrid CPU capacity scaling, + * initialize it and set the static key controlling its code paths. + * + * Must be called before arch_set_cpu_capacity(). + */ +bool arch_enable_hybrid_capacity_scale(void) +{ + int cpu; + + if (static_branch_unlikely(&arch_hybrid_cap_scale_key)) { + WARN_ONCE(1, "Hybrid CPU capacity scaling already enabled"); + return true; + } + + arch_cpu_scale = alloc_percpu(struct arch_hybrid_cpu_scale); + if (!arch_cpu_scale) + return false; + + for_each_possible_cpu(cpu) { + per_cpu_ptr(arch_cpu_scale, cpu)->capacity = SCHED_CAPACITY_SCALE; + per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio = arch_max_freq_ratio; + } + + static_branch_enable(&arch_hybrid_cap_scale_key); + + pr_info("Hybrid CPU capacity scaling enabled\n"); + + return true; +} + +/** + * arch_set_cpu_capacity() - Set scale-invariance parameters for a CPU + * @cpu: Target CPU. + * @cap: Capacity of @cpu at its maximum frequency, relative to @max_cap. + * @max_cap: System-wide maximum CPU capacity. + * @cap_freq: Frequency of @cpu corresponding to @cap. + * @base_freq: Frequency of @cpu at which MPERF counts. + * + * The units in which @cap and @max_cap are expressed do not matter, so long + * as they are consistent, because the former is effectively divided by the + * latter. Analogously for @cap_freq and @base_freq. + * + * After calling this function for all CPUs, call arch_rebuild_sched_domains() + * to let the scheduler know that capacity-aware scheduling can be used going + * forward. + */ +void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap, + unsigned long cap_freq, unsigned long base_freq) +{ + if (static_branch_likely(&arch_hybrid_cap_scale_key)) { + WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity, + div_u64(cap << SCHED_CAPACITY_SHIFT, max_cap)); + WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio, + div_u64(cap_freq << SCHED_CAPACITY_SHIFT, base_freq)); + } else { + WARN_ONCE(1, "Hybrid CPU capacity scaling not enabled"); + } +} + +unsigned long arch_scale_cpu_capacity(int cpu) +{ + if (static_branch_unlikely(&arch_hybrid_cap_scale_key)) + return READ_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity); + + return SCHED_CAPACITY_SCALE; +} +EXPORT_SYMBOL_GPL(arch_scale_cpu_capacity); + static void scale_freq_tick(u64 acnt, u64 mcnt) { - u64 freq_scale; + u64 freq_scale, freq_ratio; if (!arch_scale_freq_invariant()) return; @@ -359,7 +439,12 @@ static void scale_freq_tick(u64 acnt, u64 mcnt) if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt)) goto error; - if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt) + if (static_branch_unlikely(&arch_hybrid_cap_scale_key)) + freq_ratio = READ_ONCE(this_cpu_ptr(arch_cpu_scale)->freq_ratio); + else + freq_ratio = arch_max_freq_ratio; + + if (check_mul_overflow(mcnt, freq_ratio, &mcnt) || !mcnt) goto error; freq_scale = div64_u64(acnt, mcnt); -- cgit From 929ebc93ccaa8d183a2ba9f1cf769d1bfb847cca Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 4 Sep 2024 13:44:54 +0200 Subject: cpufreq: intel_pstate: Set asymmetric CPU capacity on hybrid systems Make intel_pstate use the HWP_HIGHEST_PERF values from MSR_HWP_CAPABILITIES to set asymmetric CPU capacity information via the previously introduced arch_set_cpu_capacity() on hybrid systems without SMT. Setting asymmetric CPU capacity is generally necessary to allow the scheduler to compute task sizes in a consistent way across all CPUs in a system where they differ by capacity. That, in turn, should help to improve scheduling decisions. It is also necessary for the schedutil cpufreq governor to operate as expected on hybrid systems where tasks migrate between CPUs of different capacities. The underlying observation is that intel_pstate already uses MSR_HWP_CAPABILITIES to get CPU performance information which is exposed by it via sysfs and CPU performance scaling is based on it. Thus using this information for setting asymmetric CPU capacity is consistent with what the driver has been doing already. Moreover, HWP_HIGHEST_PERF reflects the maximum capacity of a given CPU including both the instructions-per-cycle (IPC) factor and the maximum turbo frequency and the units in which that value is expressed are the same for all CPUs in the system, so the maximum capacity ratio between two CPUs can be obtained by computing the ratio of their HWP_HIGHEST_PERF values. Of course, in principle that capacity ratio need not be directly applicable at lower frequencies, so using it for providing the asymmetric CPU capacity information to the scheduler is a rough approximation, but it is as good as it gets. Also, measurements indicate that this approximation is not too bad in practice. If the given system is hybrid and non-SMT, the new code disables ITMT support in the scheduler (because it may get in the way of asymmetric CPU capacity code in the scheduler that automatically gets enabled by setting asymmetric CPU capacity) after initializing all online CPUs and finds the one with the maximum HWP_HIGHEST_PERF value. Next, it computes the capacity number for each (online) CPU by dividing the product of its HWP_HIGHEST_PERF and SCHED_CAPACITY_SCALE by the maximum HWP_HIGHEST_PERF. When a CPU goes offline, its capacity is reset to SCHED_CAPACITY_SCALE and if it is the one with the maximum HWP_HIGHEST_PERF value, the capacity numbers for all of the other online CPUs are recomputed. This also takes care of a cleanup during driver operation mode changes. Analogously, when a new CPU goes online, its capacity number is updated and if its HWP_HIGHEST_PERF value is greater than the current maximum one, the capacity numbers for all of the other online CPUs are recomputed. The case when the driver is notified of a CPU capacity change, either through the HWP interrupt or through an ACPI notification, is handled similarly to the CPU online case above, except that if the target CPU is the current highest-capacity one and its capacity is reduced, the capacity numbers for all of the other online CPUs need to be recomputed either. If the driver's "no_trubo" sysfs attribute is updated, all of the CPU capacity information is computed from scratch to reflect the new turbo status. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ricardo Neri Tested-by: Ricardo Neri # scale invariance Link: https://patch.msgid.link/1979653.PYKUYFuaPT@rjwysocki.net [ rjw: Fixed a typo in the changelog ] [ rjw: Renamed 3 new functions and added a comment ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 236 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 232 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index c69b420d0bc9..aaea9a39eced 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -215,6 +216,7 @@ struct global_params { * @hwp_req_cached: Cached value of the last HWP Request MSR * @hwp_cap_cached: Cached value of the last HWP Capabilities MSR * @last_io_update: Last time when IO wake flag was set + * @capacity_perf: Highest perf used for scale invariance * @sched_flags: Store scheduler flags for possible cross CPU update * @hwp_boost_min: Last HWP boosted min performance * @suspended: Whether or not the driver has been suspended. @@ -253,6 +255,7 @@ struct cpudata { u64 hwp_req_cached; u64 hwp_cap_cached; u64 last_io_update; + unsigned int capacity_perf; unsigned int sched_flags; u32 hwp_boost_min; bool suspended; @@ -295,6 +298,7 @@ static int hwp_mode_bdw __ro_after_init; static bool per_cpu_limits __ro_after_init; static bool hwp_forced __ro_after_init; static bool hwp_boost __read_mostly; +static bool hwp_is_hybrid; static struct cpufreq_driver *intel_pstate_driver __read_mostly; @@ -934,6 +938,139 @@ static struct freq_attr *hwp_cpufreq_attrs[] = { NULL, }; +static struct cpudata *hybrid_max_perf_cpu __read_mostly; +/* + * Protects hybrid_max_perf_cpu, the capacity_perf fields in struct cpudata, + * and the x86 arch scale-invariance information from concurrent updates. + */ +static DEFINE_MUTEX(hybrid_capacity_lock); + +static void hybrid_set_cpu_capacity(struct cpudata *cpu) +{ + arch_set_cpu_capacity(cpu->cpu, cpu->capacity_perf, + hybrid_max_perf_cpu->capacity_perf, + cpu->capacity_perf, + cpu->pstate.max_pstate_physical); + + pr_debug("CPU%d: perf = %u, max. perf = %u, base perf = %d\n", cpu->cpu, + cpu->capacity_perf, hybrid_max_perf_cpu->capacity_perf, + cpu->pstate.max_pstate_physical); +} + +static void hybrid_clear_cpu_capacity(unsigned int cpunum) +{ + arch_set_cpu_capacity(cpunum, 1, 1, 1, 1); +} + +static void hybrid_get_capacity_perf(struct cpudata *cpu) +{ + if (READ_ONCE(global.no_turbo)) { + cpu->capacity_perf = cpu->pstate.max_pstate_physical; + return; + } + + cpu->capacity_perf = HWP_HIGHEST_PERF(READ_ONCE(cpu->hwp_cap_cached)); +} + +static void hybrid_set_capacity_of_cpus(void) +{ + int cpunum; + + for_each_online_cpu(cpunum) { + struct cpudata *cpu = all_cpu_data[cpunum]; + + if (cpu) + hybrid_set_cpu_capacity(cpu); + } +} + +static void hybrid_update_cpu_capacity_scaling(void) +{ + struct cpudata *max_perf_cpu = NULL; + unsigned int max_cap_perf = 0; + int cpunum; + + for_each_online_cpu(cpunum) { + struct cpudata *cpu = all_cpu_data[cpunum]; + + if (!cpu) + continue; + + /* + * During initialization, CPU performance at full capacity needs + * to be determined. + */ + if (!hybrid_max_perf_cpu) + hybrid_get_capacity_perf(cpu); + + /* + * If hybrid_max_perf_cpu is not NULL at this point, it is + * being replaced, so don't take it into account when looking + * for the new one. + */ + if (cpu == hybrid_max_perf_cpu) + continue; + + if (cpu->capacity_perf > max_cap_perf) { + max_cap_perf = cpu->capacity_perf; + max_perf_cpu = cpu; + } + } + + if (max_perf_cpu) { + hybrid_max_perf_cpu = max_perf_cpu; + hybrid_set_capacity_of_cpus(); + } else { + pr_info("Found no CPUs with nonzero maximum performance\n"); + /* Revert to the flat CPU capacity structure. */ + for_each_online_cpu(cpunum) + hybrid_clear_cpu_capacity(cpunum); + } +} + +static void __hybrid_init_cpu_capacity_scaling(void) +{ + hybrid_max_perf_cpu = NULL; + hybrid_update_cpu_capacity_scaling(); +} + +static void hybrid_init_cpu_capacity_scaling(void) +{ + bool disable_itmt = false; + + mutex_lock(&hybrid_capacity_lock); + + /* + * If hybrid_max_perf_cpu is set at this point, the hybrid CPU capacity + * scaling has been enabled already and the driver is just changing the + * operation mode. + */ + if (hybrid_max_perf_cpu) { + __hybrid_init_cpu_capacity_scaling(); + goto unlock; + } + + /* + * On hybrid systems, use asym capacity instead of ITMT, but because + * the capacity of SMT threads is not deterministic even approximately, + * do not do that when SMT is in use. + */ + if (hwp_is_hybrid && !sched_smt_active() && arch_enable_hybrid_capacity_scale()) { + __hybrid_init_cpu_capacity_scaling(); + disable_itmt = true; + } + +unlock: + mutex_unlock(&hybrid_capacity_lock); + + /* + * Disabling ITMT causes sched domains to be rebuilt to disable asym + * packing and enable asym capacity. + */ + if (disable_itmt) + sched_clear_itmt_support(); +} + static void __intel_pstate_get_hwp_cap(struct cpudata *cpu) { u64 cap; @@ -962,6 +1099,43 @@ static void intel_pstate_get_hwp_cap(struct cpudata *cpu) } } +static void hybrid_update_capacity(struct cpudata *cpu) +{ + unsigned int max_cap_perf; + + mutex_lock(&hybrid_capacity_lock); + + if (!hybrid_max_perf_cpu) + goto unlock; + + /* + * The maximum performance of the CPU may have changed, but assume + * that the performance of the other CPUs has not changed. + */ + max_cap_perf = hybrid_max_perf_cpu->capacity_perf; + + intel_pstate_get_hwp_cap(cpu); + + hybrid_get_capacity_perf(cpu); + /* Should hybrid_max_perf_cpu be replaced by this CPU? */ + if (cpu->capacity_perf > max_cap_perf) { + hybrid_max_perf_cpu = cpu; + hybrid_set_capacity_of_cpus(); + goto unlock; + } + + /* If this CPU is hybrid_max_perf_cpu, should it be replaced? */ + if (cpu == hybrid_max_perf_cpu && cpu->capacity_perf < max_cap_perf) { + hybrid_update_cpu_capacity_scaling(); + goto unlock; + } + + hybrid_set_cpu_capacity(cpu); + +unlock: + mutex_unlock(&hybrid_capacity_lock); +} + static void intel_pstate_hwp_set(unsigned int cpu) { struct cpudata *cpu_data = all_cpu_data[cpu]; @@ -1070,6 +1244,22 @@ static void intel_pstate_hwp_offline(struct cpudata *cpu) value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE); wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value); + + mutex_lock(&hybrid_capacity_lock); + + if (!hybrid_max_perf_cpu) { + mutex_unlock(&hybrid_capacity_lock); + + return; + } + + if (hybrid_max_perf_cpu == cpu) + hybrid_update_cpu_capacity_scaling(); + + mutex_unlock(&hybrid_capacity_lock); + + /* Reset the capacity of the CPU going offline to the initial value. */ + hybrid_clear_cpu_capacity(cpu->cpu); } #define POWER_CTL_EE_ENABLE 1 @@ -1165,21 +1355,46 @@ static void __intel_pstate_update_max_freq(struct cpudata *cpudata, static void intel_pstate_update_limits(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); + struct cpudata *cpudata; if (!policy) return; - __intel_pstate_update_max_freq(all_cpu_data[cpu], policy); + cpudata = all_cpu_data[cpu]; + + __intel_pstate_update_max_freq(cpudata, policy); + + /* Prevent the driver from being unregistered now. */ + mutex_lock(&intel_pstate_driver_lock); cpufreq_cpu_release(policy); + + hybrid_update_capacity(cpudata); + + mutex_unlock(&intel_pstate_driver_lock); } static void intel_pstate_update_limits_for_all(void) { int cpu; - for_each_possible_cpu(cpu) - intel_pstate_update_limits(cpu); + for_each_possible_cpu(cpu) { + struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); + + if (!policy) + continue; + + __intel_pstate_update_max_freq(all_cpu_data[cpu], policy); + + cpufreq_cpu_release(policy); + } + + mutex_lock(&hybrid_capacity_lock); + + if (hybrid_max_perf_cpu) + __hybrid_init_cpu_capacity_scaling(); + + mutex_unlock(&hybrid_capacity_lock); } /************************** sysfs begin ************************/ @@ -1618,6 +1833,13 @@ static void intel_pstate_notify_work(struct work_struct *work) __intel_pstate_update_max_freq(cpudata, policy); cpufreq_cpu_release(policy); + + /* + * The driver will not be unregistered while this function is + * running, so update the capacity without acquiring the driver + * lock. + */ + hybrid_update_capacity(cpudata); } wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0); @@ -2034,8 +2256,10 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) if (pstate_funcs.get_cpu_scaling) { cpu->pstate.scaling = pstate_funcs.get_cpu_scaling(cpu->cpu); - if (cpu->pstate.scaling != perf_ctl_scaling) + if (cpu->pstate.scaling != perf_ctl_scaling) { intel_pstate_hybrid_hwp_adjust(cpu); + hwp_is_hybrid = true; + } } else { cpu->pstate.scaling = perf_ctl_scaling; } @@ -2707,6 +2931,8 @@ static int intel_pstate_cpu_online(struct cpufreq_policy *policy) */ intel_pstate_hwp_reenable(cpu); cpu->suspended = false; + + hybrid_update_capacity(cpu); } return 0; @@ -3147,6 +3373,8 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver) global.min_perf_pct = min_perf_pct_min(); + hybrid_init_cpu_capacity_scaling(); + return 0; } -- cgit From dd7c445beb7baf0ed071aba94341857b07fde69b Mon Sep 17 00:00:00 2001 From: "Yo-Jung (Leo) Lin" <0xff07@gmail.com> Date: Sun, 25 Aug 2024 17:53:50 +0800 Subject: pm-graph: Make git ignore sleepgraph.py artifacts By default, sleepgraph.py creates suspend-{date}-{time} directories to store artifacts, or suspend-{date}-{time}-xN if the --multi option is used. Ignore those directories by adding a .gitignore file. Signed-off-by: Yo-Jung (Leo) Lin <0xff07@gmail.com> Acked-by: Todd Brandt Link: https://patch.msgid.link/20240825095353.7578-1-0xff07@gmail.com [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- tools/power/pm-graph/.gitignore | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 tools/power/pm-graph/.gitignore diff --git a/tools/power/pm-graph/.gitignore b/tools/power/pm-graph/.gitignore new file mode 100644 index 000000000000..37762a8a06d6 --- /dev/null +++ b/tools/power/pm-graph/.gitignore @@ -0,0 +1,3 @@ +# sleepgraph.py artifacts +suspend-[0-9]*-[0-9]* +suspend-[0-9]*-[0-9]*-x[0-9]* -- cgit From 387ce37ed57c5d92400c5cff9d5c887f95db0236 Mon Sep 17 00:00:00 2001 From: Amit Vadhavana Date: Sun, 25 Aug 2024 16:36:20 +0530 Subject: pm-graph: Update directory handling and installation process in Makefile - Standardize directory variables to support more flexible installations. - Add copyright and licensing information to the Makefile. - Introduce ".PHONY" declarations to ensure that specific targets are always executed, regardless of the presence of files with matching names. - Add a help target to provide usage instructions. Signed-off-by: Amit Vadhavana Acked-by: Todd Brandt Link: https://patch.msgid.link/Update directory handling and installation process in Makefile [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- tools/power/pm-graph/Makefile | 111 +++++++++++++++++++++++++++--------------- 1 file changed, 73 insertions(+), 38 deletions(-) diff --git a/tools/power/pm-graph/Makefile b/tools/power/pm-graph/Makefile index b5310832c19c..aeddbaf2d4c4 100644 --- a/tools/power/pm-graph/Makefile +++ b/tools/power/pm-graph/Makefile @@ -1,51 +1,86 @@ # SPDX-License-Identifier: GPL-2.0 -PREFIX ?= /usr -DESTDIR ?= +# +# Copyright (c) 2013, Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# Authors: +# Todd Brandt + +# Prefix to the directories we're installing to +DESTDIR ?= + +# Directory definitions. These are default and most probably +# do not need to be changed. Please note that DESTDIR is +# added in front of any of them + +BINDIR ?= /usr/bin +MANDIR ?= /usr/share/man +LIBDIR ?= /usr/lib + +# Toolchain: what tools do we use, and what options do they need: +INSTALL = /usr/bin/install +INSTALL_DATA = ${INSTALL} -m 644 all: @echo "Nothing to build" install : uninstall - install -d $(DESTDIR)$(PREFIX)/lib/pm-graph - install sleepgraph.py $(DESTDIR)$(PREFIX)/lib/pm-graph - install bootgraph.py $(DESTDIR)$(PREFIX)/lib/pm-graph - install -d $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/cgskip.txt $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/freeze-callgraph.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/freeze.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/freeze-dev.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/standby-callgraph.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/standby.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/standby-dev.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/suspend-callgraph.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/suspend.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/suspend-dev.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/suspend-x2-proc.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - - install -d $(DESTDIR)$(PREFIX)/bin - ln -s ../lib/pm-graph/bootgraph.py $(DESTDIR)$(PREFIX)/bin/bootgraph - ln -s ../lib/pm-graph/sleepgraph.py $(DESTDIR)$(PREFIX)/bin/sleepgraph - - install -d $(DESTDIR)$(PREFIX)/share/man/man8 - install bootgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8 - install sleepgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8 + $(INSTALL) -d $(DESTDIR)$(LIBDIR)/pm-graph + $(INSTALL) sleepgraph.py $(DESTDIR)$(LIBDIR)/pm-graph + $(INSTALL) bootgraph.py $(DESTDIR)$(LIBDIR)/pm-graph + $(INSTALL) -d $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/cgskip.txt $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/freeze-callgraph.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/freeze.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/freeze-dev.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/standby-callgraph.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/standby.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/standby-dev.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/suspend-callgraph.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/suspend.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/suspend-dev.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/suspend-x2-proc.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + + $(INSTALL) -d $(DESTDIR)$(BINDIR) + ln -s ../lib/pm-graph/bootgraph.py $(DESTDIR)$(BINDIR)/bootgraph + ln -s ../lib/pm-graph/sleepgraph.py $(DESTDIR)$(BINDIR)/sleepgraph + + $(INSTALL) -d $(DESTDIR)$(MANDIR)/man8 + $(INSTALL) bootgraph.8 $(DESTDIR)$(MANDIR)/man8 + $(INSTALL) sleepgraph.8 $(DESTDIR)$(MANDIR)/man8 uninstall : - rm -f $(DESTDIR)$(PREFIX)/share/man/man8/bootgraph.8 - rm -f $(DESTDIR)$(PREFIX)/share/man/man8/sleepgraph.8 + rm -f $(DESTDIR)$(MANDIR)/man8/bootgraph.8 + rm -f $(DESTDIR)$(MANDIR)/man8/sleepgraph.8 - rm -f $(DESTDIR)$(PREFIX)/bin/bootgraph - rm -f $(DESTDIR)$(PREFIX)/bin/sleepgraph + rm -f $(DESTDIR)$(BINDIR)/bootgraph + rm -f $(DESTDIR)$(BINDIR)/sleepgraph - rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/config/* - if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph/config ] ; then \ - rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph/config; \ + rm -f $(DESTDIR)$(LIBDIR)/pm-graph/config/* + if [ -d $(DESTDIR)$(LIBDIR)/pm-graph/config ] ; then \ + rmdir $(DESTDIR)$(LIBDIR)/pm-graph/config; \ fi; - rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__/* - if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__ ] ; then \ - rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__; \ + rm -f $(DESTDIR)$(LIBDIR)/pm-graph/__pycache__/* + if [ -d $(DESTDIR)$(LIBDIR)/pm-graph/__pycache__ ] ; then \ + rmdir $(DESTDIR)$(LIBDIR)/pm-graph/__pycache__; \ fi; - rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/* - if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph ] ; then \ - rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph; \ + rm -f $(DESTDIR)$(LIBDIR)/pm-graph/* + if [ -d $(DESTDIR)$(LIBDIR)/pm-graph ] ; then \ + rmdir $(DESTDIR)$(LIBDIR)/pm-graph; \ fi; + +help: + @echo 'Building targets:' + @echo ' all - Nothing to build' + @echo ' install - Install the program and create necessary directories' + @echo ' uninstall - Remove installed files and directories' + +.PHONY: all install uninstall help -- cgit From c62362e786f30852a13b005c2c6dafcc1537ba06 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 3 Sep 2024 20:25:20 +0300 Subject: Documentation: PM: Discourage use of deprecated macros The Documentation refers to some deprecated macros. Update those parts accordingly. Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20240903172520.3568731-1-andriy.shevchenko@linux.intel.com [ rjw: Changelog edit ] Signed-off-by: Rafael J. Wysocki --- Documentation/power/pci.rst | 11 +++++------ Documentation/power/runtime_pm.rst | 4 ++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/Documentation/power/pci.rst b/Documentation/power/pci.rst index e2c1fb8a569a..9ebecb7b00b2 100644 --- a/Documentation/power/pci.rst +++ b/Documentation/power/pci.rst @@ -979,18 +979,17 @@ subsections can be defined as a separate function, it often is convenient to point two or more members of struct dev_pm_ops to the same routine. There are a few convenience macros that can be used for this purpose. -The SIMPLE_DEV_PM_OPS macro declares a struct dev_pm_ops object with one +The DEFINE_SIMPLE_DEV_PM_OPS() declares a struct dev_pm_ops object with one suspend routine pointed to by the .suspend(), .freeze(), and .poweroff() members and one resume routine pointed to by the .resume(), .thaw(), and .restore() members. The other function pointers in this struct dev_pm_ops are unset. -The UNIVERSAL_DEV_PM_OPS macro is similar to SIMPLE_DEV_PM_OPS, but it -additionally sets the .runtime_resume() pointer to the same value as -.resume() (and .thaw(), and .restore()) and the .runtime_suspend() pointer to -the same value as .suspend() (and .freeze() and .poweroff()). +The DEFINE_RUNTIME_DEV_PM_OPS() is similar to DEFINE_SIMPLE_DEV_PM_OPS(), but it +additionally sets the .runtime_resume() pointer to pm_runtime_force_resume() +and the .runtime_suspend() pointer to pm_runtime_force_suspend(). -The SET_SYSTEM_SLEEP_PM_OPS can be used inside of a declaration of struct +The SYSTEM_SLEEP_PM_OPS() can be used inside of a declaration of struct dev_pm_ops to indicate that one suspend routine is to be pointed to by the .suspend(), .freeze(), and .poweroff() members and one resume routine is to be pointed to by the .resume(), .thaw(), and .restore() members. diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst index 5c4e730f38d0..53d1996460ab 100644 --- a/Documentation/power/runtime_pm.rst +++ b/Documentation/power/runtime_pm.rst @@ -811,8 +811,8 @@ subsystem-level dev_pm_ops structure. Device drivers that wish to use the same function as a system suspend, freeze, poweroff and runtime suspend callback, and similarly for system resume, thaw, -restore, and runtime resume, can achieve this with the help of the -UNIVERSAL_DEV_PM_OPS macro defined in include/linux/pm.h (possibly setting its +restore, and runtime resume, can achieve similar behaviour with the help of the +DEFINE_RUNTIME_DEV_PM_OPS() defined in include/linux/pm_runtime.h (possibly setting its last argument to NULL). 8. "No-Callback" Devices -- cgit From 84e927aa679f7c72f4814ec02768bb99e4b7c5ec Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Tue, 6 Aug 2024 07:58:25 -0600 Subject: opp: ti: Drop unnecessary of_match_ptr() of_match_ptr() is not necessary as the driver is always enabled for DT. Signed-off-by: Rob Herring (Arm) Signed-off-by: Viresh Kumar --- drivers/opp/ti-opp-supply.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/opp/ti-opp-supply.c b/drivers/opp/ti-opp-supply.c index ec0056a4bb13..5f0fb3ea385b 100644 --- a/drivers/opp/ti-opp-supply.c +++ b/drivers/opp/ti-opp-supply.c @@ -405,7 +405,7 @@ static struct platform_driver ti_opp_supply_driver = { .probe = ti_opp_supply_probe, .driver = { .name = "ti_opp_supply", - .of_match_table = of_match_ptr(ti_opp_supply_of_match), + .of_match_table = ti_opp_supply_of_match, }, }; module_platform_driver(ti_opp_supply_driver); -- cgit From a84372012e9329daba8082efac98f3bc0b443aa2 Mon Sep 17 00:00:00 2001 From: Dhruva Gole Date: Tue, 3 Sep 2024 16:00:08 +0530 Subject: dt-bindings: opp: operating-points-v2-ti-cpu: Update maintainers Update the maintainers entry since I will be taking over this file from now. Signed-off-by: Dhruva Gole Acked-by: Rob Herring (Arm) Signed-off-by: Viresh Kumar --- Documentation/devicetree/bindings/opp/operating-points-v2-ti-cpu.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/opp/operating-points-v2-ti-cpu.yaml b/Documentation/devicetree/bindings/opp/operating-points-v2-ti-cpu.yaml index 02d1d2c17129..fd0c8d5c5f3e 100644 --- a/Documentation/devicetree/bindings/opp/operating-points-v2-ti-cpu.yaml +++ b/Documentation/devicetree/bindings/opp/operating-points-v2-ti-cpu.yaml @@ -19,7 +19,7 @@ description: the hardware description for the scheme mentioned above. maintainers: - - Nishanth Menon + - Dhruva Gole allOf: - $ref: opp-v2-base.yaml# -- cgit From 3f66425a4fc8663ad074cf70f432c681953bfcb7 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Tue, 6 Aug 2024 07:58:26 -0600 Subject: cpufreq: Enable COMPILE_TEST on Arm drivers COMPILE_TEST is useful for build testing without requiring a specific architecture's compiler. Enable it for most of the Arm CPUFreq drivers. As Kconfig.arm is only included on ARM and ARM64, COMPILE_TEST is only enabled for those architectures until that is dropped. Signed-off-by: Rob Herring (Arm) Signed-off-by: Viresh Kumar --- drivers/cpufreq/Kconfig.arm | 50 +++++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 96b404ce829f..5f7e13e60c80 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -5,7 +5,7 @@ config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM tristate "Allwinner nvmem based SUN50I CPUFreq driver" - depends on ARCH_SUNXI + depends on ARCH_SUNXI || COMPILE_TEST depends on NVMEM_SUNXI_SID select PM_OPP help @@ -26,15 +26,17 @@ config ARM_APPLE_SOC_CPUFREQ config ARM_ARMADA_37XX_CPUFREQ tristate "Armada 37xx CPUFreq support" - depends on ARCH_MVEBU && CPUFREQ_DT + depends on ARCH_MVEBU || COMPILE_TEST + depends on CPUFREQ_DT help This adds the CPUFreq driver support for Marvell Armada 37xx SoCs. The Armada 37xx PMU supports 4 frequency and VDD levels. config ARM_ARMADA_8K_CPUFREQ tristate "Armada 8K CPUFreq driver" - depends on ARCH_MVEBU && CPUFREQ_DT - select ARMADA_AP_CPU_CLK + depends on ARCH_MVEBU || COMPILE_TEST + depends on CPUFREQ_DT + select ARMADA_AP_CPU_CLK if COMMON_CLK help This enables the CPUFreq driver support for Marvell Armada8k SOCs. @@ -56,7 +58,7 @@ config ARM_SCPI_CPUFREQ config ARM_VEXPRESS_SPC_CPUFREQ tristate "Versatile Express SPC based CPUfreq driver" depends on ARM_CPU_TOPOLOGY && HAVE_CLK - depends on ARCH_VEXPRESS_SPC + depends on ARCH_VEXPRESS_SPC || COMPILE_TEST select PM_OPP help This add the CPUfreq driver support for Versatile Express @@ -75,7 +77,8 @@ config ARM_BRCMSTB_AVS_CPUFREQ config ARM_HIGHBANK_CPUFREQ tristate "Calxeda Highbank-based" - depends on ARCH_HIGHBANK && CPUFREQ_DT && REGULATOR + depends on ARCH_HIGHBANK || COMPILE_TEST + depends on CPUFREQ_DT && REGULATOR && PL320_MBOX default m help This adds the CPUFreq driver for Calxeda Highbank SoC @@ -96,7 +99,8 @@ config ARM_IMX6Q_CPUFREQ config ARM_IMX_CPUFREQ_DT tristate "Freescale i.MX8M cpufreq support" - depends on ARCH_MXC && CPUFREQ_DT + depends on CPUFREQ_DT + depends on ARCH_MXC || COMPILE_TEST help This adds cpufreq driver support for Freescale i.MX7/i.MX8M series SoCs, based on cpufreq-dt. @@ -111,7 +115,8 @@ config ARM_KIRKWOOD_CPUFREQ config ARM_MEDIATEK_CPUFREQ tristate "CPU Frequency scaling support for MediaTek SoCs" - depends on ARCH_MEDIATEK && REGULATOR + depends on ARCH_MEDIATEK || COMPILE_TEST + depends on REGULATOR select PM_OPP help This adds the CPUFreq driver support for MediaTek SoCs. @@ -130,12 +135,12 @@ config ARM_MEDIATEK_CPUFREQ_HW config ARM_OMAP2PLUS_CPUFREQ bool "TI OMAP2+" - depends on ARCH_OMAP2PLUS + depends on ARCH_OMAP2PLUS || COMPILE_TEST default ARCH_OMAP2PLUS config ARM_QCOM_CPUFREQ_NVMEM tristate "Qualcomm nvmem based CPUFreq" - depends on ARCH_QCOM + depends on ARCH_QCOM || COMPILE_TEST depends on NVMEM_QCOM_QFPROM depends on QCOM_SMEM select PM_OPP @@ -166,7 +171,7 @@ config ARM_RASPBERRYPI_CPUFREQ config ARM_S3C64XX_CPUFREQ bool "Samsung S3C64XX" - depends on CPU_S3C6410 + depends on CPU_S3C6410 || COMPILE_TEST default y help This adds the CPUFreq driver for Samsung S3C6410 SoC. @@ -175,7 +180,7 @@ config ARM_S3C64XX_CPUFREQ config ARM_S5PV210_CPUFREQ bool "Samsung S5PV210 and S5PC110" - depends on CPU_S5PV210 + depends on CPU_S5PV210 || COMPILE_TEST default y help This adds the CPUFreq driver for Samsung S5PV210 and @@ -199,14 +204,15 @@ config ARM_SCMI_CPUFREQ config ARM_SPEAR_CPUFREQ bool "SPEAr CPUFreq support" - depends on PLAT_SPEAR + depends on PLAT_SPEAR || COMPILE_TEST default y help This adds the CPUFreq driver support for SPEAr SOCs. config ARM_STI_CPUFREQ tristate "STi CPUFreq support" - depends on CPUFREQ_DT && SOC_STIH407 + depends on CPUFREQ_DT + depends on SOC_STIH407 || COMPILE_TEST help This driver uses the generic OPP framework to match the running platform with a predefined set of suitable values. If not provided @@ -216,34 +222,38 @@ config ARM_STI_CPUFREQ config ARM_TEGRA20_CPUFREQ tristate "Tegra20/30 CPUFreq support" - depends on ARCH_TEGRA && CPUFREQ_DT + depends on ARCH_TEGRA || COMPILE_TEST + depends on CPUFREQ_DT default y help This adds the CPUFreq driver support for Tegra20/30 SOCs. config ARM_TEGRA124_CPUFREQ bool "Tegra124 CPUFreq support" - depends on ARCH_TEGRA && CPUFREQ_DT + depends on ARCH_TEGRA || COMPILE_TEST + depends on CPUFREQ_DT default y help This adds the CPUFreq driver support for Tegra124 SOCs. config ARM_TEGRA186_CPUFREQ tristate "Tegra186 CPUFreq support" - depends on ARCH_TEGRA && TEGRA_BPMP + depends on ARCH_TEGRA || COMPILE_TEST + depends on TEGRA_BPMP help This adds the CPUFreq driver support for Tegra186 SOCs. config ARM_TEGRA194_CPUFREQ tristate "Tegra194 CPUFreq support" - depends on ARCH_TEGRA_194_SOC && TEGRA_BPMP + depends on ARCH_TEGRA_194_SOC || (64BIT && COMPILE_TEST) + depends on TEGRA_BPMP default y help This adds CPU frequency driver support for Tegra194 SOCs. config ARM_TI_CPUFREQ bool "Texas Instruments CPUFreq support" - depends on ARCH_OMAP2PLUS || ARCH_K3 + depends on ARCH_OMAP2PLUS || ARCH_K3 || COMPILE_TEST default y help This driver enables valid OPPs on the running platform based on @@ -255,7 +265,7 @@ config ARM_TI_CPUFREQ config ARM_PXA2xx_CPUFREQ tristate "Intel PXA2xx CPUfreq driver" - depends on PXA27x || PXA25x + depends on PXA27x || PXA25x || COMPILE_TEST help This add the CPUFreq driver support for Intel PXA2xx SOCs. -- cgit From 81746019b9fbb9fbf7c522dcbeefb572ac0f9458 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Tue, 6 Aug 2024 07:58:27 -0600 Subject: cpufreq: Drop CONFIG_ARM and CONFIG_ARM64 dependency on Arm drivers The CONFIG_ARM and CONFIG_ARM64 dependency is redundant as all the drivers have necessary sub-arch dependency and don't depend on the architecture support. Signed-off-by: Rob Herring (Arm) Signed-off-by: Viresh Kumar --- drivers/cpufreq/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 10cda6f2fe1d..2561b215432a 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -231,9 +231,7 @@ if X86 source "drivers/cpufreq/Kconfig.x86" endif -if ARM || ARM64 source "drivers/cpufreq/Kconfig.arm" -endif if PPC32 || PPC64 source "drivers/cpufreq/Kconfig.powerpc" -- cgit From 49243adc715e6ae34d6cc003827e63bcf5b3a21d Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Fri, 9 Aug 2024 06:08:16 +0000 Subject: cpufreq/amd-pstate: Add the missing cpufreq_cpu_put() Fix the reference counting of cpufreq_policy object in amd_pstate_update() function by adding the missing cpufreq_cpu_put(). Fixes: e8f555daacd3 ("cpufreq/amd-pstate: fix setting policy current frequency value") Signed-off-by: Dhananjay Ugwekar Reviewed-by: Perry Yuan Signed-off-by: Viresh Kumar --- drivers/cpufreq/amd-pstate.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 68c616b572f2..eff039ba49ee 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -554,12 +554,15 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, } if (value == prev) - return; + goto cpufreq_policy_put; WRITE_ONCE(cpudata->cppc_req_cached, value); amd_pstate_update_perf(cpudata, min_perf, des_perf, max_perf, fast_switch); + +cpufreq_policy_put: + cpufreq_cpu_put(policy); } static int amd_pstate_verify(struct cpufreq_policy_data *policy) -- cgit From 24f9bfb847b7340b91e35742adf0ab87440e7079 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Fri, 9 Aug 2024 11:24:38 -0600 Subject: cpufreq: Fix warning on unused of_device_id tables for !CONFIG_OF !CONFIG_OF builds cause warnings on unused of_device_id tables. This is due to of_match_node() being a macro rather than static inline function. Add a __maybe_unused annotation to the of_device_id tables. Fixes: c7582ec85342 ("cpufreq: Drop CONFIG_ARM and CONFIG_ARM64 dependency on Arm drivers") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202408090714.wcrqU6Pk-lkp@intel.com/ Signed-off-by: Rob Herring (Arm) Signed-off-by: Viresh Kumar --- drivers/cpufreq/apple-soc-cpufreq.c | 2 +- drivers/cpufreq/mediatek-cpufreq.c | 2 +- drivers/cpufreq/qcom-cpufreq-nvmem.c | 2 +- drivers/cpufreq/ti-cpufreq.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/apple-soc-cpufreq.c b/drivers/cpufreq/apple-soc-cpufreq.c index af34c22fa273..4dcacab9b4bf 100644 --- a/drivers/cpufreq/apple-soc-cpufreq.c +++ b/drivers/cpufreq/apple-soc-cpufreq.c @@ -85,7 +85,7 @@ static const struct apple_soc_cpufreq_info soc_default_info = { .cur_pstate_mask = 0, /* fallback */ }; -static const struct of_device_id apple_soc_cpufreq_of_match[] = { +static const struct of_device_id apple_soc_cpufreq_of_match[] __maybe_unused = { { .compatible = "apple,t8103-cluster-cpufreq", .data = &soc_t8103_info, diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 3a1aadaa723c..663f61565cf7 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -738,7 +738,7 @@ static const struct mtk_cpufreq_platform_data mt8516_platform_data = { }; /* List of machines supported by this driver */ -static const struct of_device_id mtk_cpufreq_machines[] __initconst = { +static const struct of_device_id mtk_cpufreq_machines[] __initconst __maybe_unused = { { .compatible = "mediatek,mt2701", .data = &mt2701_platform_data }, { .compatible = "mediatek,mt2712", .data = &mt2701_platform_data }, { .compatible = "mediatek,mt7622", .data = &mt7622_platform_data }, diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index 939702dfa73f..703308fb891a 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -611,7 +611,7 @@ static struct platform_driver qcom_cpufreq_driver = { }, }; -static const struct of_device_id qcom_cpufreq_match_list[] __initconst = { +static const struct of_device_id qcom_cpufreq_match_list[] __initconst __maybe_unused = { { .compatible = "qcom,apq8096", .data = &match_data_kryo }, { .compatible = "qcom,msm8909", .data = &match_data_msm8909 }, { .compatible = "qcom,msm8996", .data = &match_data_kryo }, diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index 4d3f27958fbd..220fff7a302e 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -419,7 +419,7 @@ static int ti_cpufreq_setup_syscon_register(struct ti_cpufreq_data *opp_data) return 0; } -static const struct of_device_id ti_cpufreq_of_match[] = { +static const struct of_device_id ti_cpufreq_of_match[] __maybe_unused = { { .compatible = "ti,am33xx", .data = &am3x_soc_data, }, { .compatible = "ti,am3517", .data = &am3517_soc_data, }, { .compatible = "ti,am43", .data = &am4x_soc_data, }, -- cgit From b14ceb82c3a1b6f1422af4ea7a2a686dbaf0a094 Mon Sep 17 00:00:00 2001 From: Danila Tikhonov Date: Thu, 8 Aug 2024 21:40:17 +0300 Subject: cpufreq: Add SM7325 to cpufreq-dt-platdev blocklist The Qualcomm SM7325 platform uses the qcom-cpufreq-hw driver, so add it to the cpufreq-dt-platdev driver's blocklist. Signed-off-by: Danila Tikhonov Reviewed-by: Dmitry Baryshkov Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index cac379ba006d..18942bfe9c95 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -166,6 +166,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "qcom,sm6350", }, { .compatible = "qcom,sm6375", }, { .compatible = "qcom,sm7225", }, + { .compatible = "qcom,sm7325", }, { .compatible = "qcom,sm8150", }, { .compatible = "qcom,sm8250", }, { .compatible = "qcom,sm8350", }, -- cgit From 5493f9714e4cdaf0ee7cec15899a231400cb1a9f Mon Sep 17 00:00:00 2001 From: Anastasia Belova Date: Mon, 26 Aug 2024 16:38:41 +0300 Subject: cpufreq: amd-pstate: add check for cpufreq_cpu_get's return value cpufreq_cpu_get may return NULL. To avoid NULL-dereference check it and return in case of error. Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Anastasia Belova Reviewed-by: Perry Yuan Signed-off-by: Viresh Kumar --- drivers/cpufreq/amd-pstate.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index eff039ba49ee..f4b9fef3342b 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -659,7 +659,12 @@ static void amd_pstate_adjust_perf(unsigned int cpu, unsigned long max_perf, min_perf, des_perf, cap_perf, lowest_nonlinear_perf; struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - struct amd_cpudata *cpudata = policy->driver_data; + struct amd_cpudata *cpudata; + + if (!policy) + return; + + cpudata = policy->driver_data; if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) amd_pstate_update_min_max_limit(policy); @@ -873,11 +878,16 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) static void amd_pstate_update_limits(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - struct amd_cpudata *cpudata = policy->driver_data; + struct amd_cpudata *cpudata; u32 prev_high = 0, cur_high = 0; int ret; bool highest_perf_changed = false; + if (!policy) + return; + + cpudata = policy->driver_data; + mutex_lock(&amd_pstate_driver_lock); if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore)) goto free_cpufreq_put; -- cgit From 2b7ec33e534f7a10033a5cf07794acf48b182bbe Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 28 Aug 2024 14:24:59 +0800 Subject: cpufreq: loongson3: Use raw_smp_processor_id() in do_service_request() Use raw_smp_processor_id() instead of plain smp_processor_id() in do_service_request(), otherwise we may get some errors with the driver enabled: BUG: using smp_processor_id() in preemptible [00000000] code: (udev-worker)/208 caller is loongson3_cpufreq_probe+0x5c/0x250 [loongson3_cpufreq] Reported-by: Xi Ruoyao Tested-by: Binbin Zhou Signed-off-by: Huacai Chen Signed-off-by: Viresh Kumar --- drivers/cpufreq/loongson3_cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/loongson3_cpufreq.c b/drivers/cpufreq/loongson3_cpufreq.c index 5f79b6de127c..6b5e6798d9a2 100644 --- a/drivers/cpufreq/loongson3_cpufreq.c +++ b/drivers/cpufreq/loongson3_cpufreq.c @@ -176,7 +176,7 @@ static DEFINE_PER_CPU(struct loongson3_freq_data *, freq_data); static inline int do_service_request(u32 id, u32 info, u32 cmd, u32 val, u32 extra) { int retries; - unsigned int cpu = smp_processor_id(); + unsigned int cpu = raw_smp_processor_id(); unsigned int package = cpu_data[cpu].package; union smc_message msg, last; -- cgit From abc00ffda43bd4ba85896713464c7510c39f8165 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Wed, 28 Aug 2024 08:19:15 -0500 Subject: cpufreq: ti-cpufreq: Introduce quirks to handle syscon fails appropriately Commit b4bc9f9e27ed ("cpufreq: ti-cpufreq: add support for omap34xx and omap36xx") introduced special handling for OMAP3 class devices where syscon node may not be present. However, this also creates a bug where the syscon node is present, however the offset used to read is beyond the syscon defined range. Fix this by providing a quirk option that is populated when such special handling is required. This allows proper failure for all other platforms when the syscon node and efuse offsets are mismatched. Fixes: b4bc9f9e27ed ("cpufreq: ti-cpufreq: add support for omap34xx and omap36xx") Signed-off-by: Nishanth Menon Tested-by: Dhruva Gole Reviewed-by: Kevin Hilman Signed-off-by: Viresh Kumar --- drivers/cpufreq/ti-cpufreq.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index 220fff7a302e..804329e81eb8 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -90,6 +90,9 @@ struct ti_cpufreq_soc_data { unsigned long efuse_shift; unsigned long rev_offset; bool multi_regulator; +/* Backward compatibility hack: Might have missing syscon */ +#define TI_QUIRK_SYSCON_MAY_BE_MISSING 0x1 + u8 quirks; }; struct ti_cpufreq_data { @@ -254,6 +257,7 @@ static struct ti_cpufreq_soc_data omap34xx_soc_data = { .efuse_mask = BIT(3), .rev_offset = OMAP3_CONTROL_IDCODE - OMAP3_SYSCON_BASE, .multi_regulator = false, + .quirks = TI_QUIRK_SYSCON_MAY_BE_MISSING, }; /* @@ -281,6 +285,7 @@ static struct ti_cpufreq_soc_data omap36xx_soc_data = { .efuse_mask = BIT(9), .rev_offset = OMAP3_CONTROL_IDCODE - OMAP3_SYSCON_BASE, .multi_regulator = true, + .quirks = TI_QUIRK_SYSCON_MAY_BE_MISSING, }; /* @@ -295,6 +300,7 @@ static struct ti_cpufreq_soc_data am3517_soc_data = { .efuse_mask = 0, .rev_offset = OMAP3_CONTROL_IDCODE - OMAP3_SYSCON_BASE, .multi_regulator = false, + .quirks = TI_QUIRK_SYSCON_MAY_BE_MISSING, }; static struct ti_cpufreq_soc_data am625_soc_data = { @@ -340,7 +346,7 @@ static int ti_cpufreq_get_efuse(struct ti_cpufreq_data *opp_data, ret = regmap_read(opp_data->syscon, opp_data->soc_data->efuse_offset, &efuse); - if (ret == -EIO) { + if (opp_data->soc_data->quirks & TI_QUIRK_SYSCON_MAY_BE_MISSING && ret == -EIO) { /* not a syscon register! */ void __iomem *regs = ioremap(OMAP3_SYSCON_BASE + opp_data->soc_data->efuse_offset, 4); @@ -381,7 +387,7 @@ static int ti_cpufreq_get_rev(struct ti_cpufreq_data *opp_data, ret = regmap_read(opp_data->syscon, opp_data->soc_data->rev_offset, &revision); - if (ret == -EIO) { + if (opp_data->soc_data->quirks & TI_QUIRK_SYSCON_MAY_BE_MISSING && ret == -EIO) { /* not a syscon register! */ void __iomem *regs = ioremap(OMAP3_SYSCON_BASE + opp_data->soc_data->rev_offset, 4); -- cgit From 87fa4bd351a51ec1eb2d4854f4112c7811fef519 Mon Sep 17 00:00:00 2001 From: Liu Jing Date: Mon, 2 Sep 2024 16:28:16 +0800 Subject: cpufreq: Fix the cacography in powernv-cpufreq.c The word 'swtich' is wrong, so fix it. Signed-off-by: Liu Jing Signed-off-by: Viresh Kumar --- drivers/cpufreq/powernv-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 50c62929f7ca..c1b6280de5dc 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -692,7 +692,7 @@ static void gpstate_timer_handler(struct timer_list *t) } /* - * If PMCR was last updated was using fast_swtich then + * If PMCR was last updated was using fast_switch then * We may have wrong in gpstate->last_lpstate_idx * value. Hence, read from PMCR to get correct data. */ -- cgit From 6b612d1bac67b0f483fde7779a45f6310274d4eb Mon Sep 17 00:00:00 2001 From: Dhruva Gole Date: Mon, 2 Sep 2024 14:51:35 +0530 Subject: cpufreq: ti-cpufreq: Use socinfo to get revision in AM62 family In the AM62x, AM62Ax, and AM62Px devices, we already have the revision info within the k3-socinfo driver. Hence, re-use this information from there instead of re using the offset for 2 drivers trying to get the same information ie. revision. Signed-off-by: Dhruva Gole Signed-off-by: Viresh Kumar --- drivers/cpufreq/ti-cpufreq.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index 804329e81eb8..ba621ce1cdda 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -16,6 +16,7 @@ #include #include #include +#include #define REVISION_MASK 0xF #define REVISION_SHIFT 28 @@ -303,6 +304,13 @@ static struct ti_cpufreq_soc_data am3517_soc_data = { .quirks = TI_QUIRK_SYSCON_MAY_BE_MISSING, }; +static const struct soc_device_attribute k3_cpufreq_soc[] = { + { .family = "AM62X", .revision = "SR1.0" }, + { .family = "AM62AX", .revision = "SR1.0" }, + { .family = "AM62PX", .revision = "SR1.0" }, + { /* sentinel */ } +}; + static struct ti_cpufreq_soc_data am625_soc_data = { .efuse_xlate = am625_efuse_xlate, .efuse_offset = 0x0018, @@ -384,6 +392,16 @@ static int ti_cpufreq_get_rev(struct ti_cpufreq_data *opp_data, struct device *dev = opp_data->cpu_dev; u32 revision; int ret; + if (soc_device_match(k3_cpufreq_soc)) { + /* + * Since the SR is 1.0, hard code the revision_value as + * 0x1 here. This way we avoid re using the same register + * that is giving us required information inside socinfo + * anyway. + */ + *revision_value = 0x1; + goto done; + } ret = regmap_read(opp_data->syscon, opp_data->soc_data->rev_offset, &revision); @@ -406,6 +424,7 @@ static int ti_cpufreq_get_rev(struct ti_cpufreq_data *opp_data, *revision_value = BIT((revision >> REVISION_SHIFT) & REVISION_MASK); +done: return 0; } -- cgit From 53e4e2b51727d325cd13feddd1d8d3d9be3df884 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Wed, 5 Jun 2024 11:18:18 -0700 Subject: PM/devfreq: governor: add missing MODULE_DESCRIPTION() macros make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/devfreq/governor_simpleondemand.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/devfreq/governor_performance.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/devfreq/governor_powersave.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/devfreq/governor_userspace.o Add all missing invocations of the MODULE_DESCRIPTION() macro. Link: https://lore.kernel.org/lkml/20240605-md-drivers-devfreq-v1-1-d01ae91b907e@quicinc.com/ Signed-off-by: Jeff Johnson Signed-off-by: Chanwoo Choi --- drivers/devfreq/governor_performance.c | 1 + drivers/devfreq/governor_powersave.c | 1 + drivers/devfreq/governor_simpleondemand.c | 1 + drivers/devfreq/governor_userspace.c | 1 + 4 files changed, 4 insertions(+) diff --git a/drivers/devfreq/governor_performance.c b/drivers/devfreq/governor_performance.c index 5dbc1e56ec08..2e4e981446fa 100644 --- a/drivers/devfreq/governor_performance.c +++ b/drivers/devfreq/governor_performance.c @@ -58,4 +58,5 @@ static void __exit devfreq_performance_exit(void) return; } module_exit(devfreq_performance_exit); +MODULE_DESCRIPTION("DEVFREQ Performance governor"); MODULE_LICENSE("GPL"); diff --git a/drivers/devfreq/governor_powersave.c b/drivers/devfreq/governor_powersave.c index 4746af2435b0..f059e8814804 100644 --- a/drivers/devfreq/governor_powersave.c +++ b/drivers/devfreq/governor_powersave.c @@ -58,4 +58,5 @@ static void __exit devfreq_powersave_exit(void) return; } module_exit(devfreq_powersave_exit); +MODULE_DESCRIPTION("DEVFREQ Powersave governor"); MODULE_LICENSE("GPL"); diff --git a/drivers/devfreq/governor_simpleondemand.c b/drivers/devfreq/governor_simpleondemand.c index d57b82a2b570..c23435736367 100644 --- a/drivers/devfreq/governor_simpleondemand.c +++ b/drivers/devfreq/governor_simpleondemand.c @@ -140,4 +140,5 @@ static void __exit devfreq_simple_ondemand_exit(void) return; } module_exit(devfreq_simple_ondemand_exit); +MODULE_DESCRIPTION("DEVFREQ Simple On-demand governor"); MODULE_LICENSE("GPL"); diff --git a/drivers/devfreq/governor_userspace.c b/drivers/devfreq/governor_userspace.c index d69672ccacc4..d1aa6806b683 100644 --- a/drivers/devfreq/governor_userspace.c +++ b/drivers/devfreq/governor_userspace.c @@ -153,4 +153,5 @@ static void __exit devfreq_userspace_exit(void) return; } module_exit(devfreq_userspace_exit); +MODULE_DESCRIPTION("DEVFREQ Userspace governor"); MODULE_LICENSE("GPL"); -- cgit From 629277b7f575792ad24cc5fd8e708aaea2a5584c Mon Sep 17 00:00:00 2001 From: Anand Moon Date: Fri, 10 May 2024 15:10:24 +0530 Subject: PM / devfreq: exynos: Use Use devm_clk_get_enabled() helpers The devm_clk_get_enabled() helpers: - call devm_clk_get() - call clk_prepare_enable() and register what is needed in order to call clk_disable_unprepare() when needed, as a managed resource. This simplifies the code and avoids the calls to clk_disable_unprepare(). While at it, use dev_err_probe consistently, and use its return value to return the error code. Link: https://lore.kernel.org/lkml/20240510094034.12493-1-linux.amoon@gmail.com/ Signed-off-by: Anand Moon Signed-off-by: Chanwoo Choi --- drivers/devfreq/exynos-bus.c | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c index 00118580905a..7d06c476d8e9 100644 --- a/drivers/devfreq/exynos-bus.c +++ b/drivers/devfreq/exynos-bus.c @@ -160,7 +160,6 @@ static void exynos_bus_exit(struct device *dev) platform_device_unregister(bus->icc_pdev); dev_pm_opp_of_remove_table(dev); - clk_disable_unprepare(bus->clk); dev_pm_opp_put_regulators(bus->opp_token); } @@ -171,7 +170,6 @@ static void exynos_bus_passive_exit(struct device *dev) platform_device_unregister(bus->icc_pdev); dev_pm_opp_of_remove_table(dev); - clk_disable_unprepare(bus->clk); } static int exynos_bus_parent_parse_of(struct device_node *np, @@ -247,23 +245,16 @@ static int exynos_bus_parse_of(struct device_node *np, int ret; /* Get the clock to provide each bus with source clock */ - bus->clk = devm_clk_get(dev, "bus"); - if (IS_ERR(bus->clk)) { - dev_err(dev, "failed to get bus clock\n"); - return PTR_ERR(bus->clk); - } - - ret = clk_prepare_enable(bus->clk); - if (ret < 0) { - dev_err(dev, "failed to get enable clock\n"); - return ret; - } + bus->clk = devm_clk_get_enabled(dev, "bus"); + if (IS_ERR(bus->clk)) + return dev_err_probe(dev, PTR_ERR(bus->clk), + "failed to get bus clock\n"); /* Get the freq and voltage from OPP table to scale the bus freq */ ret = dev_pm_opp_of_add_table(dev); if (ret < 0) { dev_err(dev, "failed to get OPP table\n"); - goto err_clk; + return ret; } rate = clk_get_rate(bus->clk); @@ -281,8 +272,6 @@ static int exynos_bus_parse_of(struct device_node *np, err_opp: dev_pm_opp_of_remove_table(dev); -err_clk: - clk_disable_unprepare(bus->clk); return ret; } @@ -453,7 +442,6 @@ static int exynos_bus_probe(struct platform_device *pdev) err: dev_pm_opp_of_remove_table(dev); - clk_disable_unprepare(bus->clk); err_reg: dev_pm_opp_put_regulators(bus->opp_token); -- cgit From d47552124bb0b9527da7a95357ae7d2e6046c4f6 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 31 Jul 2024 13:12:49 -0600 Subject: PM / devfreq: imx-bus: Use of_property_present() Use of_property_present() to test for property presence rather than of_get_property(). This is part of a larger effort to remove callers of of_get_property() and similar functions. of_get_property() leaks the DT property data pointer which is a problem for dynamically allocated nodes which may be freed. Link: https://lore.kernel.org/lkml/20240731191312.1710417-11-robh@kernel.org/ Acked-by: Peng Fan Signed-off-by: Rob Herring (Arm) Signed-off-by: Chanwoo Choi --- drivers/devfreq/imx-bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/devfreq/imx-bus.c b/drivers/devfreq/imx-bus.c index 86850b7dea09..49798f542d68 100644 --- a/drivers/devfreq/imx-bus.c +++ b/drivers/devfreq/imx-bus.c @@ -59,7 +59,7 @@ static int imx_bus_init_icc(struct device *dev) struct imx_bus *priv = dev_get_drvdata(dev); const char *icc_driver_name; - if (!of_get_property(dev->of_node, "#interconnect-cells", NULL)) + if (!of_property_present(dev->of_node, "#interconnect-cells")) return 0; if (!IS_ENABLED(CONFIG_INTERCONNECT_IMX)) { dev_warn(dev, "imx interconnect drivers disabled\n"); -- cgit From c3e093efbc6cac7bf9dc531dcb751b86daaa65b0 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sun, 1 Sep 2024 00:00:35 -0500 Subject: cpufreq/amd-pstate: Catch failures for amd_pstate_epp_update_limit() amd_pstate_set_epp() calls cppc_set_epp_perf() which can fail for a variety of reasons but this is ignored. Change the return flow to allow failures. Reviewed-by: Perry Yuan Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 89bda7a2bb8d..b5fcde1bed32 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1555,7 +1555,7 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) pr_debug("CPU %d exiting\n", policy->cpu); } -static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy) +static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; u32 max_perf, min_perf, min_limit_perf, max_limit_perf; @@ -1605,7 +1605,7 @@ static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy) * This return value can only be negative for shared_memory * systems where EPP register read/write not supported. */ - return; + return epp; } if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) @@ -1618,12 +1618,13 @@ static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy) } WRITE_ONCE(cpudata->cppc_req_cached, value); - amd_pstate_set_epp(cpudata, epp); + return amd_pstate_set_epp(cpudata, epp); } static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; + int ret; if (!policy->cpuinfo.max_freq) return -ENODEV; @@ -1633,7 +1634,9 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) cpudata->policy = policy->policy; - amd_pstate_epp_update_limit(policy); + ret = amd_pstate_epp_update_limit(policy); + if (ret) + return ret; /* * policy->cur is never updated with the amd_pstate_epp driver, but it -- cgit From 4b80294fb53845dc5c98cca0c989da09150f2ca9 Mon Sep 17 00:00:00 2001 From: "John B. Wyatt IV" Date: Wed, 4 Sep 2024 22:19:08 -0400 Subject: pm:cpupower: Add missing powercap_set_enabled() stub function There was a symbol listed in the powercap.h file that was not implemented. Implement it with a stub return of 0. Programs like SWIG require that functions that are defined in the headers be implemented. Fixes: c2294c1496b7 ("cpupower: Introduce powercap intel-rapl library and powercap-info command") Suggested-by: Shuah Khan Signed-off-by: John B. Wyatt IV Signed-off-by: John B. Wyatt IV Signed-off-by: Shuah Khan --- tools/power/cpupower/lib/powercap.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/power/cpupower/lib/powercap.c b/tools/power/cpupower/lib/powercap.c index a7a59c6bacda..94a0c69e55ef 100644 --- a/tools/power/cpupower/lib/powercap.c +++ b/tools/power/cpupower/lib/powercap.c @@ -77,6 +77,14 @@ int powercap_get_enabled(int *mode) return sysfs_get_enabled(path, mode); } +/* + * TODO: implement function. Returns dummy 0 for now. + */ +int powercap_set_enabled(int mode) +{ + return 0; +} + /* * Hardcoded, because rapl is the only powercap implementation - * this needs to get more generic if more powercap implementations -- cgit From 338f490e07bc9e83c6fe60125ed9dea3e5d6a45d Mon Sep 17 00:00:00 2001 From: "John B. Wyatt IV" Date: Wed, 4 Sep 2024 22:19:09 -0400 Subject: pm:cpupower: Add SWIG bindings files for libcpupower SWIG is a tool packaged in Fedora and other distros that can generate bindings from C and C++ code for several languages including Python, Perl, and Go. These bindings allows users to easily write scripts that use and extend libcpupower's functionality. Currently, only Python is provided in the makefile, but additional languages may be added if there is demand. Added suggestions from Shuah Khan for the README and license discussion. Note that while SWIG itself is GPL v3+ licensed; the resulting output, the bindings code, is permissively licensed + the license of the .o files. Please see https://swig.org/legal.html and [1] for more details. [1] https://lore.kernel.org/linux-pm/Zqv9BOjxLAgyNP5B@hatbackup/ Suggested-by: Shuah Khan Signed-off-by: John B. Wyatt IV Signed-off-by: John B. Wyatt IV Signed-off-by: Shuah Khan --- tools/power/cpupower/bindings/python/.gitignore | 8 + tools/power/cpupower/bindings/python/Makefile | 31 +++ tools/power/cpupower/bindings/python/README | 59 +++++ .../cpupower/bindings/python/raw_pylibcpupower.i | 247 +++++++++++++++++++++ 4 files changed, 345 insertions(+) create mode 100644 tools/power/cpupower/bindings/python/.gitignore create mode 100644 tools/power/cpupower/bindings/python/Makefile create mode 100644 tools/power/cpupower/bindings/python/README create mode 100644 tools/power/cpupower/bindings/python/raw_pylibcpupower.i diff --git a/tools/power/cpupower/bindings/python/.gitignore b/tools/power/cpupower/bindings/python/.gitignore new file mode 100644 index 000000000000..5c9a1f0212dd --- /dev/null +++ b/tools/power/cpupower/bindings/python/.gitignore @@ -0,0 +1,8 @@ +__pycache__/ +raw_pylibcpupower_wrap.c +*.o +*.so +*.py +!test_raw_pylibcpupower.py +# git keeps ignoring this file, use git add -f raw_libcpupower.i +!raw_pylibcpupower.i diff --git a/tools/power/cpupower/bindings/python/Makefile b/tools/power/cpupower/bindings/python/Makefile new file mode 100644 index 000000000000..d0418f902795 --- /dev/null +++ b/tools/power/cpupower/bindings/python/Makefile @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Makefile for libcpupower's Python bindings +# +# This Makefile expects you have already run the makefile for cpupower to build +# the .o files in the lib directory for the bindings to be created. + +CC=gcc + +LIB_DIR = ../../lib +BIND_DIR = . +PY_INCLUDE := $(firstword $(shell python-config --includes)) +#PY_INCLUDE = $(shell python-config --includes | awk '{ print $1 }') + +OBJECTS_LIB = $(wildcard $(LIB_DIR)/*.o) +OBJECTS_BIND = $(wildcard $(BIND_DIR)/*.o) + +all: _raw_pylibcpupower.so + +_raw_pylibcpupower.so: raw_pylibcpupower_wrap.o + $(CC) -shared $(OBJECTS_LIB) raw_pylibcpupower_wrap.o -o _raw_pylibcpupower.so # raw_pylibcpupower_wrap.o +# $(CC) -shared $(OBJECTS_BIND) $(OBJECTS_LIB) -o _raw_pylibcpupower.so # raw_pylibcpupower_wrap.o + +raw_pylibcpupower_wrap.o: raw_pylibcpupower_wrap.c + $(CC) -fPIC -c raw_pylibcpupower_wrap.c $(PY_INCLUDE) + +raw_pylibcpupower_wrap.c: raw_pylibcpupower.i + swig -python raw_pylibcpupower.i + +# Will only clean the bindings folder; will not clean the actual cpupower folder +clean: + rm -f raw_pylibcpupower.py raw_pylibcpupower_wrap.c raw_pylibcpupower_wrap.o _raw_pylibcpupower.so diff --git a/tools/power/cpupower/bindings/python/README b/tools/power/cpupower/bindings/python/README new file mode 100644 index 000000000000..0a4bb2581e8a --- /dev/null +++ b/tools/power/cpupower/bindings/python/README @@ -0,0 +1,59 @@ +This folder contains the necessary files to build the Python bindings for +libcpupower (aside from the libcpupower object files). + + +requirements +------------ + +* You need the object files in the libcpupower directory compiled by +cpupower's makefile. +* The SWIG program must be installed. +* The Python's development libraries installed. + +Please check that your version of SWIG is compatible with the version of Python +installed on your machine by checking the SWIG changelog on their website. +https://swig.org/ + +Note that while SWIG itself is GPL v3+ licensed; the resulting output, +the bindings code: is permissively licensed + the license of libcpupower's .o +files. For these bindings that means GPL v2. + +Please see https://swig.org/legal.html and the discussion [1] for more details. + +[1] +https://lore.kernel.org/linux-pm/Zqv9BOjxLAgyNP5B@hatbackup/ + + +build +----- + +Install SWIG and the Python development files provided by your distribution. + +Build the object files for libcpupower by running make in the cpupower +directory. + +Return to the directory this README is in to run: + +$ make + + +testing +------- + +Please verify the _raw_pylibcpupower.so and raw_pylibcpupower.py files have +been created. + +To run the test script: + +$ python test_raw_pylibcpupower.py + + +credits +------- + +Original Bindings Author: +John B. Wyatt IV +jwyatt@redhat.com +sageofredondo@gmail.com + +Copyright (C) 2024 Red Hat diff --git a/tools/power/cpupower/bindings/python/raw_pylibcpupower.i b/tools/power/cpupower/bindings/python/raw_pylibcpupower.i new file mode 100644 index 000000000000..96556d87a745 --- /dev/null +++ b/tools/power/cpupower/bindings/python/raw_pylibcpupower.i @@ -0,0 +1,247 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +%module raw_pylibcpupower +%{ +#include "../../lib/cpupower_intern.h" +#include "../../lib/acpi_cppc.h" +#include "../../lib/cpufreq.h" +#include "../../lib/cpuidle.h" +#include "../../lib/cpupower.h" +#include "../../lib/powercap.h" +%} + +/* + * cpupower_intern.h + */ + +#define PATH_TO_CPU "/sys/devices/system/cpu/" +#define MAX_LINE_LEN 4096 +#define SYSFS_PATH_MAX 255 + +int is_valid_path(const char *path); + +unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen); + +unsigned int cpupower_write_sysfs(const char *path, char *buf, size_t buflen); + +/* + * acpi_cppc.h + */ + +enum acpi_cppc_value { + HIGHEST_PERF, + LOWEST_PERF, + NOMINAL_PERF, + LOWEST_NONLINEAR_PERF, + LOWEST_FREQ, + NOMINAL_FREQ, + REFERENCE_PERF, + WRAPAROUND_TIME, + MAX_CPPC_VALUE_FILES +}; + +unsigned long acpi_cppc_get_data(unsigned int cpu, + enum acpi_cppc_value which); + +/* + * cpufreq.h + */ + +struct cpufreq_policy { + unsigned long min; + unsigned long max; + char *governor; +}; + +struct cpufreq_available_governors { + char *governor; + struct cpufreq_available_governors *next; + struct cpufreq_available_governors *first; +}; + +struct cpufreq_available_frequencies { + unsigned long frequency; + struct cpufreq_available_frequencies *next; + struct cpufreq_available_frequencies *first; +}; + + +struct cpufreq_affected_cpus { + unsigned int cpu; + struct cpufreq_affected_cpus *next; + struct cpufreq_affected_cpus *first; +}; + +struct cpufreq_stats { + unsigned long frequency; + unsigned long long time_in_state; + struct cpufreq_stats *next; + struct cpufreq_stats *first; +}; + +unsigned long cpufreq_get_freq_kernel(unsigned int cpu); + +unsigned long cpufreq_get_freq_hardware(unsigned int cpu); + +#define cpufreq_get(cpu) cpufreq_get_freq_kernel(cpu); + +unsigned long cpufreq_get_transition_latency(unsigned int cpu); + +int cpufreq_get_hardware_limits(unsigned int cpu, + unsigned long *min, + unsigned long *max); + +char *cpufreq_get_driver(unsigned int cpu); + +void cpufreq_put_driver(char *ptr); + +struct cpufreq_policy *cpufreq_get_policy(unsigned int cpu); + +void cpufreq_put_policy(struct cpufreq_policy *policy); + +struct cpufreq_available_governors +*cpufreq_get_available_governors(unsigned int cpu); + +void cpufreq_put_available_governors( + struct cpufreq_available_governors *first); + +struct cpufreq_available_frequencies +*cpufreq_get_available_frequencies(unsigned int cpu); + +void cpufreq_put_available_frequencies( + struct cpufreq_available_frequencies *first); + +struct cpufreq_available_frequencies +*cpufreq_get_boost_frequencies(unsigned int cpu); + +void cpufreq_put_boost_frequencies( + struct cpufreq_available_frequencies *first); + +struct cpufreq_affected_cpus *cpufreq_get_affected_cpus(unsigned + int cpu); + +void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *first); + +struct cpufreq_affected_cpus *cpufreq_get_related_cpus(unsigned + int cpu); + +void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *first); + +struct cpufreq_stats *cpufreq_get_stats(unsigned int cpu, + unsigned long long *total_time); + +void cpufreq_put_stats(struct cpufreq_stats *stats); + +unsigned long cpufreq_get_transitions(unsigned int cpu); + +int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy); + +int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq); + +int cpufreq_modify_policy_max(unsigned int cpu, unsigned long max_freq); + +int cpufreq_modify_policy_governor(unsigned int cpu, char *governor); + +int cpufreq_set_frequency(unsigned int cpu, + unsigned long target_frequency); + +unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu, + const char **table, + unsigned int index, + unsigned int size); + +/* + * cpuidle.h + */ + +int cpuidle_is_state_disabled(unsigned int cpu, + unsigned int idlestate); +int cpuidle_state_disable(unsigned int cpu, unsigned int idlestate, + unsigned int disable); +unsigned long cpuidle_state_latency(unsigned int cpu, + unsigned int idlestate); +unsigned long cpuidle_state_usage(unsigned int cpu, + unsigned int idlestate); +unsigned long long cpuidle_state_time(unsigned int cpu, + unsigned int idlestate); +char *cpuidle_state_name(unsigned int cpu, + unsigned int idlestate); +char *cpuidle_state_desc(unsigned int cpu, + unsigned int idlestate); +unsigned int cpuidle_state_count(unsigned int cpu); + +char *cpuidle_get_governor(void); + +char *cpuidle_get_driver(void); + +/* + * cpupower.h + */ + +struct cpupower_topology { + /* Amount of CPU cores, packages and threads per core in the system */ + unsigned int cores; + unsigned int pkgs; + unsigned int threads; /* per core */ + + /* Array gets mallocated with cores entries, holding per core info */ + struct cpuid_core_info *core_info; +}; + +struct cpuid_core_info { + int pkg; + int core; + int cpu; + + /* flags */ + unsigned int is_online:1; +}; + +int get_cpu_topology(struct cpupower_topology *cpu_top); + +void cpu_topology_release(struct cpupower_topology cpu_top); + +int cpupower_is_cpu_online(unsigned int cpu); + +/* + * powercap.h + */ + +struct powercap_zone { + char name[MAX_LINE_LEN]; + /* + * sys_name relative to PATH_TO_POWERCAP, + * do not forget the / in between + */ + char sys_name[SYSFS_PATH_MAX]; + int tree_depth; + struct powercap_zone *parent; + struct powercap_zone *children[POWERCAP_MAX_CHILD_ZONES]; + /* More possible caps or attributes to be added? */ + uint32_t has_power_uw:1, + has_energy_uj:1; + +}; + +int powercap_walk_zones(struct powercap_zone *zone, + int (*f)(struct powercap_zone *zone)); + +struct powercap_zone *powercap_init_zones(void); + +int powercap_get_enabled(int *mode); + +int powercap_set_enabled(int mode); + +int powercap_get_driver(char *driver, int buflen); + +int powercap_get_max_energy_range_uj(struct powercap_zone *zone, uint64_t *val); + +int powercap_get_energy_uj(struct powercap_zone *zone, uint64_t *val); + +int powercap_get_max_power_range_uw(struct powercap_zone *zone, uint64_t *val); + +int powercap_get_power_uw(struct powercap_zone *zone, uint64_t *val); + +int powercap_zone_get_enabled(struct powercap_zone *zone, int *mode); + +int powercap_zone_set_enabled(struct powercap_zone *zone, int mode); -- cgit From 660475266b744ab02695c6f3cdf28b120b884125 Mon Sep 17 00:00:00 2001 From: "John B. Wyatt IV" Date: Wed, 4 Sep 2024 22:19:10 -0400 Subject: pm:cpupower: Include test_raw_pylibcpupower.py This script demonstrates how to make use of, and tests, the bindings. In the future, this script could become part of a larger test suite to test the bindings and libcpupower. Signed-off-by: John B. Wyatt IV Signed-off-by: John B. Wyatt IV Signed-off-by: Shuah Khan --- .../bindings/python/test_raw_pylibcpupower.py | 42 ++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100755 tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py diff --git a/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py b/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py new file mode 100755 index 000000000000..3d6f62b9556a --- /dev/null +++ b/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only + +import raw_pylibcpupower as p + +# Simple function call + +""" +Get cstate count +""" +cpu_cstates_count = p.cpuidle_state_count(0) +if cpu_cstates_count > -1: + print(f"CPU 0 has {cpu_cstates_count} c-states") +else: + print(f"cstate count error: return code: {cpu_cstates_count}") + +""" +Disable cstate (will fail if the above is 0, ex: a virtual machine) +""" +cstate_disabled = p.cpuidle_state_disable(0, 0, 1) +if cpu_cstates_count == 0: + print(f"CPU 0 has {cpu_cstates_count} c-states") +else: + print(f"cstate count error: return code: {cpu_cstates_count}") + +match cstate_disabled: + case 0: + print(f"CPU state disabled") + case -1: + print(f"Idlestate not available") + case _: + print(f"Not documented") + + +# Pointer example + +topo = p.cpupower_topology() +total_cpus = p.get_cpu_topology(topo) +if total_cpus > 0: + print(f"Number of total cpus: {total_cpus} and number of cores: {topo.cores}") +else: + print(f"Error: could not get cpu topology") -- cgit From 757eebc10839de8b16a29192382584a5e5a36173 Mon Sep 17 00:00:00 2001 From: "John B. Wyatt IV" Date: Wed, 4 Sep 2024 22:19:11 -0400 Subject: MAINTAINERS: Add Maintainers for SWIG Python bindings Adding myself as the primary maintainer and John Kacur as the backup maintainer for the libcpupower SWIG generated Python bindings. Suggested-by: Shuah Khan Signed-off-by: John B. Wyatt IV Signed-off-by: John B. Wyatt IV Signed-off-by: Shuah Khan --- MAINTAINERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 42decde38320..d3b661e5496e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5819,6 +5819,9 @@ CPU POWER MONITORING SUBSYSTEM M: Thomas Renninger M: Shuah Khan M: Shuah Khan +M: John B. Wyatt IV +M: John B. Wyatt IV +M: John Kacur L: linux-pm@vger.kernel.org S: Maintained F: tools/power/cpupower/ -- cgit From 80e67f1802d0fc21543216557a68320c71d7dbe1 Mon Sep 17 00:00:00 2001 From: "John B. Wyatt IV" Date: Fri, 6 Sep 2024 09:00:06 -0400 Subject: pm:cpupower: Add error warning when SWIG is not installed Add error message to better explain to the user when SWIG and python-config is missing from the path. Makefile was cleaned up and unneeded elements were removed. Suggested-by: Shuah Khan Signed-off-by: John B. Wyatt IV Signed-off-by: John B. Wyatt IV Signed-off-by: Shuah Khan --- tools/power/cpupower/bindings/python/Makefile | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/tools/power/cpupower/bindings/python/Makefile b/tools/power/cpupower/bindings/python/Makefile index d0418f902795..dc09c5b66ead 100644 --- a/tools/power/cpupower/bindings/python/Makefile +++ b/tools/power/cpupower/bindings/python/Makefile @@ -4,26 +4,28 @@ # This Makefile expects you have already run the makefile for cpupower to build # the .o files in the lib directory for the bindings to be created. -CC=gcc - -LIB_DIR = ../../lib -BIND_DIR = . -PY_INCLUDE := $(firstword $(shell python-config --includes)) -#PY_INCLUDE = $(shell python-config --includes | awk '{ print $1 }') +CC := gcc +HAVE_SWIG := $(shell if which swig >/dev/null 2>&1; then echo 1; else echo 0; fi) +HAVE_PYCONFIG := $(shell if which python-config >/dev/null 2>&1; then echo 1; else echo 0; fi) +LIB_DIR := ../../lib +PY_INCLUDE = $(firstword $(shell python-config --includes)) OBJECTS_LIB = $(wildcard $(LIB_DIR)/*.o) -OBJECTS_BIND = $(wildcard $(BIND_DIR)/*.o) all: _raw_pylibcpupower.so _raw_pylibcpupower.so: raw_pylibcpupower_wrap.o - $(CC) -shared $(OBJECTS_LIB) raw_pylibcpupower_wrap.o -o _raw_pylibcpupower.so # raw_pylibcpupower_wrap.o -# $(CC) -shared $(OBJECTS_BIND) $(OBJECTS_LIB) -o _raw_pylibcpupower.so # raw_pylibcpupower_wrap.o + $(CC) -shared $(OBJECTS_LIB) raw_pylibcpupower_wrap.o -o _raw_pylibcpupower.so raw_pylibcpupower_wrap.o: raw_pylibcpupower_wrap.c $(CC) -fPIC -c raw_pylibcpupower_wrap.c $(PY_INCLUDE) raw_pylibcpupower_wrap.c: raw_pylibcpupower.i +ifeq ($(HAVE_SWIG),0) + $(error "swig was not found. Make sure you have it installed and in the PATH to generate the bindings.") +else ifeq ($(HAVE_PYCONFIG),0) + $(error "python-config was not found. Make sure you have it installed and in the PATH to generate the bindings.") +endif swig -python raw_pylibcpupower.i # Will only clean the bindings folder; will not clean the actual cpupower folder -- cgit From c3565a35d97102fbea69e324086d5e33ee2ab34e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 5 Sep 2024 21:48:48 +0300 Subject: PM: hibernate: Remove unused stub for saveable_highmem_page() When saveable_highmem_page() is unused, it prevents kernel builds with clang, `make W=1` and CONFIG_WERROR=y: kernel/power/snapshot.c:1369:21: error: unused function 'saveable_highmem_page' [-Werror,-Wunused-function] 1369 | static inline void *saveable_highmem_page(struct zone *z, unsigned long p) | ^~~~~~~~~~~~~~~~~~~~~ Fix this by removing unused stub. See also commit 6863f5643dd7 ("kbuild: allow Clang to find unused static inline functions for W=1 build"). Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20240905184848.318978-1-andriy.shevchenko@linux.intel.com Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 405eddbda4fc..30894d8f0a78 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1365,11 +1365,6 @@ static unsigned int count_highmem_pages(void) } return n; } -#else -static inline void *saveable_highmem_page(struct zone *z, unsigned long p) -{ - return NULL; -} #endif /* CONFIG_HIGHMEM */ /** -- cgit From 2bcec09cc4ae62229e149213499e45b74190a24a Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 26 Aug 2024 16:13:51 -0500 Subject: x86/amd: Move amd_get_highest_perf() from amd.c to cppc.c To prepare to let amd_get_highest_perf() detect preferred cores it will require CPPC functions. Move amd_get_highest_perf() to cppc.c to prepare for 'preferred core detection' rework. No functional changes intended. Reviewed-by: Perry Yuan Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- arch/x86/kernel/acpi/cppc.c | 16 ++++++++++++++++ arch/x86/kernel/cpu/amd.c | 16 ---------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index ff8f25faca3d..7ec8f2ce859c 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -116,3 +116,19 @@ void init_freq_invariance_cppc(void) init_done = true; mutex_unlock(&freq_invariance_lock); } + +u32 amd_get_highest_perf(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) || + (c->x86_model >= 0x70 && c->x86_model < 0x80))) + return 166; + + if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) || + (c->x86_model >= 0x40 && c->x86_model < 0x70))) + return 166; + + return 255; +} +EXPORT_SYMBOL_GPL(amd_get_highest_perf); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index be5889bded49..1be1f913434f 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1190,22 +1190,6 @@ unsigned long amd_get_dr_addr_mask(unsigned int dr) } EXPORT_SYMBOL_GPL(amd_get_dr_addr_mask); -u32 amd_get_highest_perf(void) -{ - struct cpuinfo_x86 *c = &boot_cpu_data; - - if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) || - (c->x86_model >= 0x70 && c->x86_model < 0x80))) - return 166; - - if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) || - (c->x86_model >= 0x40 && c->x86_model < 0x70))) - return 166; - - return 255; -} -EXPORT_SYMBOL_GPL(amd_get_highest_perf); - static void zenbleed_check_cpu(void *unused) { struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); -- cgit From 01ced022e125f7b328335bb2944a107afcafe351 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 3 Sep 2024 15:32:16 -0500 Subject: ACPI: CPPC: Adjust return code for inline functions in !CONFIG_ACPI_CPPC_LIB Checkpath emits the following warning: ``` WARNING: ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP ``` Adjust the code accordingly. Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- include/acpi/cppc_acpi.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 930b6afba6f4..409a7190a4a8 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -162,31 +162,31 @@ extern int cppc_set_auto_sel(int cpu, bool enable); #else /* !CONFIG_ACPI_CPPC_LIB */ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int cppc_get_highest_perf(int cpunum, u64 *highest_perf) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int cppc_set_enable(int cpu, bool enable) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline bool cppc_perf_ctrs_in_pcc(void) { @@ -210,27 +210,27 @@ static inline bool cpc_ffh_supported(void) } static inline int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int cppc_get_epp_perf(int cpunum, u64 *epp_perf) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int cppc_set_auto_sel(int cpu, bool enable) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps) { - return -ENOTSUPP; + return -EOPNOTSUPP; } #endif /* !CONFIG_ACPI_CPPC_LIB */ -- cgit From 6c09e3b445a1a647a5b57ea6afd23e846225dd8f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 26 Aug 2024 16:13:52 -0500 Subject: x86/amd: Rename amd_get_highest_perf() to amd_get_boost_ratio_numerator() The function name is ambiguous because it returns an intermediate value for calculating maximum frequency rather than the CPPC 'Highest Perf' register. Rename the function to clarify its use and allow the function to return errors. Adjust the consumer in acpi-cpufreq to catch errors. Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- arch/x86/include/asm/processor.h | 3 --- arch/x86/kernel/acpi/cppc.c | 44 +++++++++++++++++++++++++++++----------- drivers/cpufreq/acpi-cpufreq.c | 12 ++++++++--- include/acpi/cppc_acpi.h | 5 +++++ 4 files changed, 46 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a75a07f4931f..775acbdea1a9 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -691,8 +691,6 @@ static inline u32 per_cpu_l2c_id(unsigned int cpu) } #ifdef CONFIG_CPU_SUP_AMD -extern u32 amd_get_highest_perf(void); - /* * Issue a DIV 0/1 insn to clear any division data from previous DIV * operations. @@ -705,7 +703,6 @@ static __always_inline void amd_clear_divider(void) extern void amd_check_microcode(void); #else -static inline u32 amd_get_highest_perf(void) { return 0; } static inline void amd_clear_divider(void) { } static inline void amd_check_microcode(void) { } #endif diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index 7ec8f2ce859c..660cfeb6384b 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -69,7 +69,7 @@ int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) static void amd_set_max_freq_ratio(void) { struct cppc_perf_caps perf_caps; - u64 highest_perf, nominal_perf; + u64 numerator, nominal_perf; u64 perf_ratio; int rc; @@ -79,15 +79,19 @@ static void amd_set_max_freq_ratio(void) return; } - highest_perf = amd_get_highest_perf(); + rc = amd_get_boost_ratio_numerator(0, &numerator); + if (rc) { + pr_debug("Could not retrieve highest performance (%d)\n", rc); + return; + } nominal_perf = perf_caps.nominal_perf; - if (!highest_perf || !nominal_perf) { - pr_debug("Could not retrieve highest or nominal performance\n"); + if (!nominal_perf) { + pr_debug("Could not retrieve nominal performance\n"); return; } - perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf); + perf_ratio = div_u64(numerator * SCHED_CAPACITY_SCALE, nominal_perf); /* midpoint between max_boost and max_P */ perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1; if (!perf_ratio) { @@ -117,18 +121,34 @@ void init_freq_invariance_cppc(void) mutex_unlock(&freq_invariance_lock); } -u32 amd_get_highest_perf(void) +/** + * amd_get_boost_ratio_numerator: Get the numerator to use for boost ratio calculation + * @cpu: CPU to get numerator for. + * @numerator: Output variable for numerator. + * + * Determine the numerator to use for calculating the boost ratio on + * a CPU. On systems that support preferred cores, this will be a hardcoded + * value. On other systems this will the highest performance register value. + * + * Return: 0 for success, negative error code otherwise. + */ +int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator) { struct cpuinfo_x86 *c = &boot_cpu_data; if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) || - (c->x86_model >= 0x70 && c->x86_model < 0x80))) - return 166; + (c->x86_model >= 0x70 && c->x86_model < 0x80))) { + *numerator = 166; + return 0; + } if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) || - (c->x86_model >= 0x40 && c->x86_model < 0x70))) - return 166; + (c->x86_model >= 0x40 && c->x86_model < 0x70))) { + *numerator = 166; + return 0; + } + *numerator = 255; - return 255; + return 0; } -EXPORT_SYMBOL_GPL(amd_get_highest_perf); +EXPORT_SYMBOL_GPL(amd_get_boost_ratio_numerator); diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index a8ca625a98b8..0f04feb6cafa 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -642,10 +642,16 @@ static u64 get_max_boost_ratio(unsigned int cpu) return 0; } - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - highest_perf = amd_get_highest_perf(); - else + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { + ret = amd_get_boost_ratio_numerator(cpu, &highest_perf); + if (ret) { + pr_debug("CPU%d: Unable to get boost ratio numerator (%d)\n", + cpu, ret); + return 0; + } + } else { highest_perf = perf_caps.highest_perf; + } nominal_perf = perf_caps.nominal_perf; diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 409a7190a4a8..97861abc5f5b 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -159,6 +159,7 @@ extern int cppc_get_epp_perf(int cpunum, u64 *epp_perf); extern int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable); extern int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps); extern int cppc_set_auto_sel(int cpu, bool enable); +extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator); #else /* !CONFIG_ACPI_CPPC_LIB */ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf) { @@ -232,6 +233,10 @@ static inline int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf { return -EOPNOTSUPP; } +static inline int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator) +{ + return -EOPNOTSUPP; +} #endif /* !CONFIG_ACPI_CPPC_LIB */ #endif /* _CPPC_ACPI_H*/ -- cgit From 3355ac2541052154b6ca0b1263be5bf49dfa0158 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 27 Aug 2024 13:50:45 -0500 Subject: ACPI: CPPC: Drop check for non zero perf ratio perf_ratio is a u64 and SCHED_CAPACITY_SCALE is a large number. Shifting by one will never have a zero value. Drop the check. Suggested-by: Gautham R. Shenoy Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- arch/x86/kernel/acpi/cppc.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index 660cfeb6384b..e65c77afab31 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -91,13 +91,8 @@ static void amd_set_max_freq_ratio(void) return; } - perf_ratio = div_u64(numerator * SCHED_CAPACITY_SCALE, nominal_perf); /* midpoint between max_boost and max_P */ - perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1; - if (!perf_ratio) { - pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n"); - return; - } + perf_ratio = (div_u64(numerator * SCHED_CAPACITY_SCALE, nominal_perf) + SCHED_CAPACITY_SCALE) >> 1; freq_invariance_set_perf_ratio(perf_ratio, false); } -- cgit From 21fb59ab4b9767085f4fe1edbdbe3177fbb9ec97 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 26 Aug 2024 16:13:53 -0500 Subject: ACPI: CPPC: Adjust debug messages in amd_set_max_freq_ratio() to warn If the boost ratio isn't calculated properly for the system for any reason this can cause other problems that are non-obvious. Raise all messages to warn instead. Suggested-by: Perry Yuan Reviewed-by: Perry Yuan Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- arch/x86/kernel/acpi/cppc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index e65c77afab31..f0328ce98a8f 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -75,19 +75,19 @@ static void amd_set_max_freq_ratio(void) rc = cppc_get_perf_caps(0, &perf_caps); if (rc) { - pr_debug("Could not retrieve perf counters (%d)\n", rc); + pr_warn("Could not retrieve perf counters (%d)\n", rc); return; } rc = amd_get_boost_ratio_numerator(0, &numerator); if (rc) { - pr_debug("Could not retrieve highest performance (%d)\n", rc); + pr_warn("Could not retrieve highest performance (%d)\n", rc); return; } nominal_perf = perf_caps.nominal_perf; if (!nominal_perf) { - pr_debug("Could not retrieve nominal performance\n"); + pr_warn("Could not retrieve nominal performance\n"); return; } -- cgit From 2819bfef6483c66c55064ca678f2630a1a09f3f9 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 26 Aug 2024 16:13:54 -0500 Subject: x86/amd: Move amd_get_highest_perf() out of amd-pstate amd_pstate_get_highest_perf() is a helper used to get the highest perf value on AMD systems. It's used in amd-pstate as part of preferred core handling, but applicable for acpi-cpufreq as well. Move it out to cppc handling code as amd_get_highest_perf(). Reviewed-by: Perry Yuan Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- arch/x86/kernel/acpi/cppc.c | 30 ++++++++++++++++++++++++++++++ drivers/cpufreq/amd-pstate.c | 34 ++-------------------------------- include/acpi/cppc_acpi.h | 5 +++++ 3 files changed, 37 insertions(+), 32 deletions(-) diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index f0328ce98a8f..a75dcb382c78 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -116,6 +116,36 @@ void init_freq_invariance_cppc(void) mutex_unlock(&freq_invariance_lock); } +/* + * Get the highest performance register value. + * @cpu: CPU from which to get highest performance. + * @highest_perf: Return address for highest performance value. + * + * Return: 0 for success, negative error code otherwise. + */ +int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf) +{ + u64 val; + int ret; + + if (cpu_feature_enabled(X86_FEATURE_CPPC)) { + ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &val); + if (ret) + goto out; + + val = AMD_CPPC_HIGHEST_PERF(val); + } else { + ret = cppc_get_highest_perf(cpu, &val); + if (ret) + goto out; + } + + WRITE_ONCE(*highest_perf, (u32)val); +out: + return ret; +} +EXPORT_SYMBOL_GPL(amd_get_highest_perf); + /** * amd_get_boost_ratio_numerator: Get the numerator to use for boost ratio calculation * @cpu: CPU to get numerator for. diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index f4b9fef3342b..8da567913f63 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -811,36 +811,6 @@ static void amd_pstste_sched_prefcore_workfn(struct work_struct *work) } static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn); -/* - * Get the highest performance register value. - * @cpu: CPU from which to get highest performance. - * @highest_perf: Return address. - * - * Return: 0 for success, -EIO otherwise. - */ -static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf) -{ - int ret; - - if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - u64 cap1; - - ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1); - if (ret) - return ret; - WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1)); - } else { - u64 cppc_highest_perf; - - ret = cppc_get_highest_perf(cpu, &cppc_highest_perf); - if (ret) - return ret; - WRITE_ONCE(*highest_perf, cppc_highest_perf); - } - - return (ret); -} - #define CPPC_MAX_PERF U8_MAX static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) @@ -848,7 +818,7 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) int ret, prio; u32 highest_perf; - ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf); + ret = amd_get_highest_perf(cpudata->cpu, &highest_perf); if (ret) return; @@ -892,7 +862,7 @@ static void amd_pstate_update_limits(unsigned int cpu) if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore)) goto free_cpufreq_put; - ret = amd_pstate_get_highest_perf(cpu, &cur_high); + ret = amd_get_highest_perf(cpu, &cur_high); if (ret) goto free_cpufreq_put; diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 97861abc5f5b..f7c7abf2a95e 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -159,6 +159,7 @@ extern int cppc_get_epp_perf(int cpunum, u64 *epp_perf); extern int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable); extern int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps); extern int cppc_set_auto_sel(int cpu, bool enable); +extern int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf); extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator); #else /* !CONFIG_ACPI_CPPC_LIB */ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf) @@ -233,6 +234,10 @@ static inline int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf { return -EOPNOTSUPP; } +static inline int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf) +{ + return -ENODEV; +} static inline int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator) { return -EOPNOTSUPP; -- cgit From 279f838a61f96cbfeb1f9ba060e4a452e6e041d0 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 26 Aug 2024 16:13:55 -0500 Subject: x86/amd: Detect preferred cores in amd_get_boost_ratio_numerator() AMD systems that support preferred cores will use "166" as their numerator for max frequency calculations instead of "255". Add a function for detecting preferred cores by looking at the highest perf value on all cores. If preferred cores are enabled return 166 and if disabled the value in the highest perf register. As the function will be called multiple times, cache the values for the boost numerator and if preferred cores will be enabled in global variables. Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- arch/x86/kernel/acpi/cppc.c | 93 +++++++++++++++++++++++++++++++++++++++----- drivers/cpufreq/amd-pstate.c | 34 +++++++--------- include/acpi/cppc_acpi.h | 5 +++ 3 files changed, 101 insertions(+), 31 deletions(-) diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index a75dcb382c78..df367bc35930 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -9,6 +9,16 @@ #include #include +#define CPPC_HIGHEST_PERF_PREFCORE 166 + +enum amd_pref_core { + AMD_PREF_CORE_UNKNOWN = 0, + AMD_PREF_CORE_SUPPORTED, + AMD_PREF_CORE_UNSUPPORTED, +}; +static enum amd_pref_core amd_pref_core_detected; +static u64 boost_numerator; + /* Refer to drivers/acpi/cppc_acpi.c for the description of functions */ bool cpc_supported_by_cpu(void) @@ -146,6 +156,66 @@ out: } EXPORT_SYMBOL_GPL(amd_get_highest_perf); +/** + * amd_detect_prefcore: Detect if CPUs in the system support preferred cores + * @detected: Output variable for the result of the detection. + * + * Determine whether CPUs in the system support preferred cores. On systems + * that support preferred cores, different highest perf values will be found + * on different cores. On other systems, the highest perf value will be the + * same on all cores. + * + * The result of the detection will be stored in the 'detected' parameter. + * + * Return: 0 for success, negative error code otherwise + */ +int amd_detect_prefcore(bool *detected) +{ + int cpu, count = 0; + u64 highest_perf[2] = {0}; + + if (WARN_ON(!detected)) + return -EINVAL; + + switch (amd_pref_core_detected) { + case AMD_PREF_CORE_SUPPORTED: + *detected = true; + return 0; + case AMD_PREF_CORE_UNSUPPORTED: + *detected = false; + return 0; + default: + break; + } + + for_each_present_cpu(cpu) { + u32 tmp; + int ret; + + ret = amd_get_highest_perf(cpu, &tmp); + if (ret) + return ret; + + if (!count || (count == 1 && tmp != highest_perf[0])) + highest_perf[count++] = tmp; + + if (count == 2) + break; + } + + *detected = (count == 2); + boost_numerator = highest_perf[0]; + + amd_pref_core_detected = *detected ? AMD_PREF_CORE_SUPPORTED : + AMD_PREF_CORE_UNSUPPORTED; + + pr_debug("AMD CPPC preferred core is %ssupported (highest perf: 0x%llx)\n", + *detected ? "" : "un", highest_perf[0]); + + return 0; +} +EXPORT_SYMBOL_GPL(amd_detect_prefcore); + /** * amd_get_boost_ratio_numerator: Get the numerator to use for boost ratio calculation * @cpu: CPU to get numerator for. @@ -155,24 +225,27 @@ EXPORT_SYMBOL_GPL(amd_get_highest_perf); * a CPU. On systems that support preferred cores, this will be a hardcoded * value. On other systems this will the highest performance register value. * + * If booting the system with amd-pstate enabled but preferred cores disabled then + * the correct boost numerator will be returned to match hardware capabilities + * even if the preferred cores scheduling hints are not enabled. + * * Return: 0 for success, negative error code otherwise. */ int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator) { - struct cpuinfo_x86 *c = &boot_cpu_data; + bool prefcore; + int ret; - if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) || - (c->x86_model >= 0x70 && c->x86_model < 0x80))) { - *numerator = 166; - return 0; - } + ret = amd_detect_prefcore(&prefcore); + if (ret) + return ret; - if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) || - (c->x86_model >= 0x40 && c->x86_model < 0x70))) { - *numerator = 166; + /* without preferred cores, return the highest perf register value */ + if (!prefcore) { + *numerator = boost_numerator; return 0; } - *numerator = 255; + *numerator = CPPC_HIGHEST_PERF_PREFCORE; return 0; } diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 8da567913f63..937a3a817743 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -815,32 +815,18 @@ static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn); static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) { - int ret, prio; - u32 highest_perf; - - ret = amd_get_highest_perf(cpudata->cpu, &highest_perf); - if (ret) + /* user disabled or not detected */ + if (!amd_pstate_prefcore) return; cpudata->hw_prefcore = true; - /* check if CPPC preferred core feature is enabled*/ - if (highest_perf < CPPC_MAX_PERF) - prio = (int)highest_perf; - else { - pr_debug("AMD CPPC preferred core is unsupported!\n"); - cpudata->hw_prefcore = false; - return; - } - - if (!amd_pstate_prefcore) - return; /* * The priorities can be set regardless of whether or not * sched_set_itmt_support(true) has been called and it is valid to * update them at any time after it has been called. */ - sched_set_itmt_core_prio(prio, cpudata->cpu); + sched_set_itmt_core_prio((int)READ_ONCE(cpudata->highest_perf), cpudata->cpu); schedule_work(&sched_prefcore_work); } @@ -1011,12 +997,12 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) cpudata->cpu = policy->cpu; - amd_pstate_init_prefcore(cpudata); - ret = amd_pstate_init_perf(cpudata); if (ret) goto free_cpudata1; + amd_pstate_init_prefcore(cpudata); + ret = amd_pstate_init_freq(cpudata); if (ret) goto free_cpudata1; @@ -1466,12 +1452,12 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) cpudata->cpu = policy->cpu; cpudata->epp_policy = 0; - amd_pstate_init_prefcore(cpudata); - ret = amd_pstate_init_perf(cpudata); if (ret) goto free_cpudata1; + amd_pstate_init_prefcore(cpudata); + ret = amd_pstate_init_freq(cpudata); if (ret) goto free_cpudata1; @@ -1916,6 +1902,12 @@ static int __init amd_pstate_init(void) static_call_update(amd_pstate_update_perf, cppc_update_perf); } + if (amd_pstate_prefcore) { + ret = amd_detect_prefcore(&amd_pstate_prefcore); + if (ret) + return ret; + } + /* enable amd pstate feature */ ret = amd_pstate_enable(true); if (ret) { diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index f7c7abf2a95e..482e0587a041 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -161,6 +161,7 @@ extern int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps); extern int cppc_set_auto_sel(int cpu, bool enable); extern int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf); extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator); +extern int amd_detect_prefcore(bool *detected); #else /* !CONFIG_ACPI_CPPC_LIB */ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf) { @@ -242,6 +243,10 @@ static inline int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator { return -EOPNOTSUPP; } +static inline int amd_detect_prefcore(bool *detected) +{ + return -ENODEV; +} #endif /* !CONFIG_ACPI_CPPC_LIB */ #endif /* _CPPC_ACPI_H*/ -- cgit From ad4caad58d91d3293880f8074f7ad125490ce636 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 26 Aug 2024 16:13:56 -0500 Subject: cpufreq: amd-pstate: Merge amd_pstate_highest_perf_set() into amd_get_boost_ratio_numerator() The special case in amd_pstate_highest_perf_set() is the value used for calculating the boost numerator. Merge this into amd_get_boost_ratio_numerator() and then use that to calculate boost ratio. This allows dropping more special casing of the highest perf value. Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- Documentation/admin-guide/pm/amd-pstate.rst | 3 +- arch/x86/kernel/acpi/cppc.c | 16 +++++++++ drivers/cpufreq/amd-pstate.c | 52 ++++++----------------------- 3 files changed, 28 insertions(+), 43 deletions(-) diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst index d0324d44f548..e13915c54064 100644 --- a/Documentation/admin-guide/pm/amd-pstate.rst +++ b/Documentation/admin-guide/pm/amd-pstate.rst @@ -251,7 +251,8 @@ performance supported in `AMD CPPC Performance Capability `_). In some ASICs, the highest CPPC performance is not the one in the ``_CPC`` table, so we need to expose it to sysfs. If boost is not active, but still supported, this maximum frequency will be larger than the one in -``cpuinfo``. +``cpuinfo``. On systems that support preferred core, the driver will have +different values for some cores than others. This attribute is read-only. ``amd_pstate_lowest_nonlinear_freq`` diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index df367bc35930..956984054bf3 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -9,6 +9,7 @@ #include #include +#define CPPC_HIGHEST_PERF_PERFORMANCE 196 #define CPPC_HIGHEST_PERF_PREFCORE 166 enum amd_pref_core { @@ -245,6 +246,21 @@ int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator) *numerator = boost_numerator; return 0; } + + /* + * For AMD CPUs with Family ID 19H and Model ID range 0x70 to 0x7f, + * the highest performance level is set to 196. + * https://bugzilla.kernel.org/show_bug.cgi?id=218759 + */ + if (cpu_feature_enabled(X86_FEATURE_ZEN4)) { + switch (boot_cpu_data.x86_model) { + case 0x70 ... 0x7f: + *numerator = CPPC_HIGHEST_PERF_PERFORMANCE; + return 0; + default: + break; + } + } *numerator = CPPC_HIGHEST_PERF_PREFCORE; return 0; diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 937a3a817743..89623203f918 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -52,8 +52,6 @@ #define AMD_PSTATE_TRANSITION_LATENCY 20000 #define AMD_PSTATE_TRANSITION_DELAY 1000 #define AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY 600 -#define CPPC_HIGHEST_PERF_PERFORMANCE 196 -#define CPPC_HIGHEST_PERF_DEFAULT 166 #define AMD_CPPC_EPP_PERFORMANCE 0x00 #define AMD_CPPC_EPP_BALANCE_PERFORMANCE 0x80 @@ -372,43 +370,17 @@ static inline int amd_pstate_enable(bool enable) return static_call(amd_pstate_enable)(enable); } -static u32 amd_pstate_highest_perf_set(struct amd_cpudata *cpudata) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - /* - * For AMD CPUs with Family ID 19H and Model ID range 0x70 to 0x7f, - * the highest performance level is set to 196. - * https://bugzilla.kernel.org/show_bug.cgi?id=218759 - */ - if (c->x86 == 0x19 && (c->x86_model >= 0x70 && c->x86_model <= 0x7f)) - return CPPC_HIGHEST_PERF_PERFORMANCE; - - return CPPC_HIGHEST_PERF_DEFAULT; -} - static int pstate_init_perf(struct amd_cpudata *cpudata) { u64 cap1; - u32 highest_perf; int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &cap1); if (ret) return ret; - /* For platforms that do not support the preferred core feature, the - * highest_pef may be configured with 166 or 255, to avoid max frequency - * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as - * the default max perf. - */ - if (cpudata->hw_prefcore) - highest_perf = amd_pstate_highest_perf_set(cpudata); - else - highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); - - WRITE_ONCE(cpudata->highest_perf, highest_perf); - WRITE_ONCE(cpudata->max_limit_perf, highest_perf); + WRITE_ONCE(cpudata->highest_perf, AMD_CPPC_HIGHEST_PERF(cap1)); + WRITE_ONCE(cpudata->max_limit_perf, AMD_CPPC_HIGHEST_PERF(cap1)); WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); @@ -420,19 +392,13 @@ static int pstate_init_perf(struct amd_cpudata *cpudata) static int cppc_init_perf(struct amd_cpudata *cpudata) { struct cppc_perf_caps cppc_perf; - u32 highest_perf; int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); if (ret) return ret; - if (cpudata->hw_prefcore) - highest_perf = amd_pstate_highest_perf_set(cpudata); - else - highest_perf = cppc_perf.highest_perf; - - WRITE_ONCE(cpudata->highest_perf, highest_perf); - WRITE_ONCE(cpudata->max_limit_perf, highest_perf); + WRITE_ONCE(cpudata->highest_perf, cppc_perf.highest_perf); + WRITE_ONCE(cpudata->max_limit_perf, cppc_perf.highest_perf); WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf); WRITE_ONCE(cpudata->lowest_nonlinear_perf, cppc_perf.lowest_nonlinear_perf); @@ -918,8 +884,8 @@ static u32 amd_pstate_get_transition_latency(unsigned int cpu) static int amd_pstate_init_freq(struct amd_cpudata *cpudata) { int ret; - u32 min_freq; - u32 highest_perf, max_freq; + u32 min_freq, max_freq; + u64 numerator; u32 nominal_perf, nominal_freq; u32 lowest_nonlinear_perf, lowest_nonlinear_freq; u32 boost_ratio, lowest_nonlinear_ratio; @@ -941,8 +907,10 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) nominal_perf = READ_ONCE(cpudata->nominal_perf); - highest_perf = READ_ONCE(cpudata->highest_perf); - boost_ratio = div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf); + ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator); + if (ret) + return ret; + boost_ratio = div_u64(numerator << SCHED_CAPACITY_SHIFT, nominal_perf); max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000; lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); -- cgit From 45722e777fd99ea863fe653c1838d39f678506e2 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 26 Aug 2024 16:13:57 -0500 Subject: cpufreq: amd-pstate: Optimize amd_pstate_update_limits() Don't take and release the mutex when prefcore isn't present and avoid initialization of variables that will be initially set in the function. Reviewed-by: Gautham R. Shenoy Reviewed-by: Perry Yuan Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 89623203f918..f550d31f80eb 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -810,17 +810,17 @@ static void amd_pstate_update_limits(unsigned int cpu) cpudata = policy->driver_data; - mutex_lock(&amd_pstate_driver_lock); - if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore)) - goto free_cpufreq_put; + if (!amd_pstate_prefcore) + return; + mutex_lock(&amd_pstate_driver_lock); ret = amd_get_highest_perf(cpu, &cur_high); if (ret) goto free_cpufreq_put; prev_high = READ_ONCE(cpudata->prefcore_ranking); - if (prev_high != cur_high) { - highest_perf_changed = true; + highest_perf_changed = (prev_high != cur_high); + if (highest_perf_changed) { WRITE_ONCE(cpudata->prefcore_ranking, cur_high); if (cur_high < CPPC_MAX_PERF) -- cgit From b96b82d1af7fbf35e3c7d50368275005bd6b6a03 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 26 Aug 2024 16:13:58 -0500 Subject: cpufreq: amd-pstate: Add documentation for `amd_pstate_hw_prefcore` Explain that the sysfs file represents both preferred core being enabled by the user and supported by the hardware. Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- Documentation/admin-guide/pm/amd-pstate.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst index e13915c54064..d5c050ea390d 100644 --- a/Documentation/admin-guide/pm/amd-pstate.rst +++ b/Documentation/admin-guide/pm/amd-pstate.rst @@ -263,6 +263,11 @@ lowest non-linear performance in `AMD CPPC Performance Capability `_.) This attribute is read-only. +``amd_pstate_hw_prefcore`` + +Whether the platform supports the preferred core feature and it has been +enabled. This attribute is read-only. + ``energy_performance_available_preferences`` A list of all the supported EPP preferences that could be used for -- cgit From 15a2b764ea7c16dd2b1ecfd86ba27809f5bd8580 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 3 Sep 2024 14:43:10 -0500 Subject: amd-pstate: Add missing documentation for `amd_pstate_prefcore_ranking` `amd_pstate_prefcore_ranking` reflects the dynamic rankings of a CPU core based on platform conditions. Explicitly include it in the documentation. Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- Documentation/admin-guide/pm/amd-pstate.rst | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst index d5c050ea390d..210a808b74ec 100644 --- a/Documentation/admin-guide/pm/amd-pstate.rst +++ b/Documentation/admin-guide/pm/amd-pstate.rst @@ -252,7 +252,8 @@ In some ASICs, the highest CPPC performance is not the one in the ``_CPC`` table, so we need to expose it to sysfs. If boost is not active, but still supported, this maximum frequency will be larger than the one in ``cpuinfo``. On systems that support preferred core, the driver will have -different values for some cores than others. +different values for some cores than others and this will reflect the values +advertised by the platform at bootup. This attribute is read-only. ``amd_pstate_lowest_nonlinear_freq`` @@ -268,6 +269,12 @@ This attribute is read-only. Whether the platform supports the preferred core feature and it has been enabled. This attribute is read-only. +``amd_pstate_prefcore_ranking`` + +The performance ranking of the core. This number doesn't have any unit, but +larger numbers are preferred at the time of reading. This can change at +runtime based on platform conditions. This attribute is read-only. + ``energy_performance_available_preferences`` A list of all the supported EPP preferences that could be used for -- cgit From 8d916815b0afad2a12a9f1b945c79fffc144dba8 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sat, 31 Aug 2024 21:49:11 -0500 Subject: cpufreq/amd-pstate: Export symbols for changing modes In order to effectively test all mode switch combinations export everything necessarily for amd-pstate-ut to trigger a mode switch. Reviewed-by: Perry Yuan Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 23 ++++++++++------------- drivers/cpufreq/amd-pstate.h | 14 ++++++++++++++ 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index f550d31f80eb..05b39a890bfa 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -58,18 +58,6 @@ #define AMD_CPPC_EPP_BALANCE_POWERSAVE 0xBF #define AMD_CPPC_EPP_POWERSAVE 0xFF -/* - * enum amd_pstate_mode - driver working mode of amd pstate - */ -enum amd_pstate_mode { - AMD_PSTATE_UNDEFINED = 0, - AMD_PSTATE_DISABLE, - AMD_PSTATE_PASSIVE, - AMD_PSTATE_ACTIVE, - AMD_PSTATE_GUIDED, - AMD_PSTATE_MAX, -}; - static const char * const amd_pstate_mode_string[] = { [AMD_PSTATE_UNDEFINED] = "undefined", [AMD_PSTATE_DISABLE] = "disable", @@ -79,6 +67,14 @@ static const char * const amd_pstate_mode_string[] = { NULL, }; +const char *amd_pstate_get_mode_string(enum amd_pstate_mode mode) +{ + if (mode < 0 || mode >= AMD_PSTATE_MAX) + return NULL; + return amd_pstate_mode_string[mode]; +} +EXPORT_SYMBOL_GPL(amd_pstate_get_mode_string); + struct quirk_entry { u32 nominal_freq; u32 lowest_freq; @@ -1286,7 +1282,7 @@ static ssize_t amd_pstate_show_status(char *buf) return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]); } -static int amd_pstate_update_status(const char *buf, size_t size) +int amd_pstate_update_status(const char *buf, size_t size) { int mode_idx; @@ -1303,6 +1299,7 @@ static int amd_pstate_update_status(const char *buf, size_t size) return 0; } +EXPORT_SYMBOL_GPL(amd_pstate_update_status); static ssize_t status_show(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h index cc8bb2bc325a..cd573bc6b6db 100644 --- a/drivers/cpufreq/amd-pstate.h +++ b/drivers/cpufreq/amd-pstate.h @@ -103,4 +103,18 @@ struct amd_cpudata { bool boost_state; }; +/* + * enum amd_pstate_mode - driver working mode of amd pstate + */ +enum amd_pstate_mode { + AMD_PSTATE_UNDEFINED = 0, + AMD_PSTATE_DISABLE, + AMD_PSTATE_PASSIVE, + AMD_PSTATE_ACTIVE, + AMD_PSTATE_GUIDED, + AMD_PSTATE_MAX, +}; +const char *amd_pstate_get_mode_string(enum amd_pstate_mode mode); +int amd_pstate_update_status(const char *buf, size_t size); + #endif /* _LINUX_AMD_PSTATE_H */ -- cgit From 3e39e68dfbb6d4fe8c1943003d2f2a6f3255c902 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sat, 31 Aug 2024 21:49:12 -0500 Subject: cpufreq/amd-pstate-ut: Add test case for mode switches There is a state machine in the amd-pstate driver utilized for switches for all modes. To make sure that cleanup and setup works properly for each mode add a unit test case that tries all combinations. Reviewed-by: Perry Yuan Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate-ut.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index 66b73c308ce6..24432bd78a5a 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -54,12 +54,14 @@ static void amd_pstate_ut_acpi_cpc_valid(u32 index); static void amd_pstate_ut_check_enabled(u32 index); static void amd_pstate_ut_check_perf(u32 index); static void amd_pstate_ut_check_freq(u32 index); +static void amd_pstate_ut_check_driver(u32 index); static struct amd_pstate_ut_struct amd_pstate_ut_cases[] = { {"amd_pstate_ut_acpi_cpc_valid", amd_pstate_ut_acpi_cpc_valid }, {"amd_pstate_ut_check_enabled", amd_pstate_ut_check_enabled }, {"amd_pstate_ut_check_perf", amd_pstate_ut_check_perf }, - {"amd_pstate_ut_check_freq", amd_pstate_ut_check_freq } + {"amd_pstate_ut_check_freq", amd_pstate_ut_check_freq }, + {"amd_pstate_ut_check_driver", amd_pstate_ut_check_driver } }; static bool get_shared_mem(void) @@ -254,6 +256,43 @@ skip_test: cpufreq_cpu_put(policy); } +static int amd_pstate_set_mode(enum amd_pstate_mode mode) +{ + const char *mode_str = amd_pstate_get_mode_string(mode); + + pr_debug("->setting mode to %s\n", mode_str); + + return amd_pstate_update_status(mode_str, strlen(mode_str)); +} + +static void amd_pstate_ut_check_driver(u32 index) +{ + enum amd_pstate_mode mode1, mode2; + int ret; + + for (mode1 = AMD_PSTATE_DISABLE; mode1 < AMD_PSTATE_MAX; mode1++) { + ret = amd_pstate_set_mode(mode1); + if (ret) + goto out; + for (mode2 = AMD_PSTATE_DISABLE; mode2 < AMD_PSTATE_MAX; mode2++) { + if (mode1 == mode2) + continue; + ret = amd_pstate_set_mode(mode2); + if (ret) + goto out; + } + } +out: + if (ret) + pr_warn("%s: failed to update status for %s->%s: %d\n", __func__, + amd_pstate_get_mode_string(mode1), + amd_pstate_get_mode_string(mode2), ret); + + amd_pstate_ut_cases[index].result = ret ? + AMD_PSTATE_UT_RESULT_FAIL : + AMD_PSTATE_UT_RESULT_PASS; +} + static int __init amd_pstate_ut_init(void) { u32 i = 0, arr_size = ARRAY_SIZE(amd_pstate_ut_cases); -- cgit From 93497752dfed196b41d2804503e80b9a04318adb Mon Sep 17 00:00:00 2001 From: Qianqiang Liu Date: Wed, 11 Sep 2024 07:39:24 +0800 Subject: cpufreq/amd-pstate-ut: Fix an "Uninitialized variables" issue Using uninitialized value "mode2" when calling "amd_pstate_get_mode_string". Set "mode2" to "AMD_PSTATE_DISABLE" by default. Signed-off-by: Qianqiang Liu Link: https://lore.kernel.org/r/20240910233923.46470-1-qianqiang.liu@163.com Acked-by: Mario Limonciello Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate-ut.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index 24432bd78a5a..d52f26d1ffba 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -267,7 +267,7 @@ static int amd_pstate_set_mode(enum amd_pstate_mode mode) static void amd_pstate_ut_check_driver(u32 index) { - enum amd_pstate_mode mode1, mode2; + enum amd_pstate_mode mode1, mode2 = AMD_PSTATE_DISABLE; int ret; for (mode1 = AMD_PSTATE_DISABLE; mode1 < AMD_PSTATE_MAX; mode1++) { -- cgit