diff options
Diffstat (limited to 'drivers/cpufreq/powernv-cpufreq.c')
| -rw-r--r-- | drivers/cpufreq/powernv-cpufreq.c | 168 |
1 files changed, 92 insertions, 76 deletions
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 7e7ad3879c4e..7d9a5f656de8 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -1,20 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * POWERNV cpufreq driver for the IBM POWER processors * * (C) Copyright IBM 2014 * * Author: Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * */ #define pr_fmt(fmt) "powernv-cpufreq: " fmt @@ -28,9 +18,9 @@ #include <linux/of.h> #include <linux/reboot.h> #include <linux/slab.h> +#include <linux/string_choices.h> #include <linux/cpu.h> #include <linux/hashtable.h> -#include <trace/events/power.h> #include <asm/cputhreads.h> #include <asm/firmware.h> @@ -39,6 +29,9 @@ #include <asm/opal.h> #include <linux/timer.h> +#define CREATE_TRACE_POINTS +#include "powernv-trace.h" + #define POWERNV_MAX_PSTATES_ORDER 8 #define POWERNV_MAX_PSTATES (1UL << (POWERNV_MAX_PSTATES_ORDER)) #define PMSR_PSAFE_ENABLE (1UL << 30) @@ -46,6 +39,7 @@ #define MAX_PSTATE_SHIFT 32 #define LPSTATE_SHIFT 48 #define GPSTATE_SHIFT 56 +#define MAX_NR_CHIPS 32 #define MAX_RAMP_DOWN_TIME 5120 /* @@ -74,13 +68,14 @@ * highest_lpstate_idx * @last_sampled_time: Time from boot in ms when global pstates were * last set - * @last_lpstate_idx, Last set value of local pstate and global - * last_gpstate_idx pstate in terms of cpufreq table index + * @last_lpstate_idx: Last set value of local pstate and global + * @last_gpstate_idx: pstate in terms of cpufreq table index * @timer: Is used for ramping down if cpu goes idle for * a long time with global pstate held high * @gpstate_lock: A spinlock to maintain synchronization between * routines called by the timer handler and * governer's target_index calls + * @policy: Associated CPUFreq policy */ struct global_pstate_info { int highest_lpstate_idx; @@ -95,7 +90,7 @@ struct global_pstate_info { static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1]; -DEFINE_HASHTABLE(pstate_revmap, POWERNV_MAX_PSTATES_ORDER); +static DEFINE_HASHTABLE(pstate_revmap, POWERNV_MAX_PSTATES_ORDER); /** * struct pstate_idx_revmap_data: Entry in the hashmap pstate_revmap * indexed by a function of pstate id. @@ -180,7 +175,7 @@ static inline u8 extract_pstate(u64 pmsr_val, unsigned int shift) /* Use following functions for conversions between pstate_id and index */ -/** +/* * idx_to_pstate : Returns the pstate id corresponding to the * frequency in the cpufreq frequency table * powernv_freqs indexed by @i. @@ -198,7 +193,7 @@ static inline u8 idx_to_pstate(unsigned int i) return powernv_freqs[i].driver_data; } -/** +/* * pstate_to_idx : Returns the index in the cpufreq frequencytable * powernv_freqs for the frequency whose corresponding * pstate id is @pstate. @@ -244,6 +239,7 @@ static int init_powernv_pstates(void) u32 len_ids, len_freqs; u32 pstate_min, pstate_max, pstate_nominal; u32 pstate_turbo, pstate_ultra_turbo; + int rc = -ENODEV; power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); if (!power_mgt) { @@ -288,7 +284,7 @@ next: pr_info("cpufreq pstate min 0x%x nominal 0x%x max 0x%x\n", pstate_min, pstate_nominal, pstate_max); pr_info("Workload Optimized Frequency is %s in the platform\n", - (powernv_pstate_info.wof_enabled) ? "enabled" : "disabled"); + str_enabled_disabled(powernv_pstate_info.wof_enabled)); pstate_ids = of_get_property(power_mgt, "ibm,pstate-ids", &len_ids); if (!pstate_ids) { @@ -327,8 +323,11 @@ next: powernv_freqs[i].frequency = freq * 1000; /* kHz */ powernv_freqs[i].driver_data = id & 0xFF; - revmap_data = (struct pstate_idx_revmap_data *) - kmalloc(sizeof(*revmap_data), GFP_KERNEL); + revmap_data = kmalloc(sizeof(*revmap_data), GFP_KERNEL); + if (!revmap_data) { + rc = -ENOMEM; + goto out; + } revmap_data->pstate_id = id & 0xFF; revmap_data->cpufreq_table_idx = i; @@ -357,7 +356,7 @@ next: return 0; out: of_node_put(power_mgt); - return -ENODEV; + return rc; } /* Returns the CPU frequency corresponding to the pstate_id. */ @@ -386,15 +385,11 @@ static ssize_t cpuinfo_nominal_freq_show(struct cpufreq_policy *policy, powernv_freqs[powernv_pstate_info.nominal].frequency); } -struct freq_attr cpufreq_freq_attr_cpuinfo_nominal_freq = +static struct freq_attr cpufreq_freq_attr_cpuinfo_nominal_freq = __ATTR_RO(cpuinfo_nominal_freq); -#define SCALING_BOOST_FREQS_ATTR_INDEX 2 - static struct freq_attr *powernv_cpu_freq_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, &cpufreq_freq_attr_cpuinfo_nominal_freq, - &cpufreq_freq_attr_scaling_boost_freqs, NULL, }; @@ -666,15 +661,16 @@ static inline void queue_gpstate_timer(struct global_pstate_info *gpstates) /** * gpstate_timer_handler * - * @data: pointer to cpufreq_policy on which timer was queued + * @t: Timer context used to fetch global pstate info struct * * This handler brings down the global pstate closer to the local pstate * according quadratic equation. Queues a new timer if it is still not equal * to local pstate */ -void gpstate_timer_handler(struct timer_list *t) +static void gpstate_timer_handler(struct timer_list *t) { - struct global_pstate_info *gpstates = from_timer(gpstates, t, timer); + struct global_pstate_info *gpstates = timer_container_of(gpstates, t, + timer); struct cpufreq_policy *policy = gpstates->policy; int gpstate_idx, lpstate_idx; unsigned long val; @@ -696,7 +692,7 @@ void gpstate_timer_handler(struct timer_list *t) } /* - * If PMCR was last updated was using fast_swtich then + * If PMCR was last updated was using fast_switch then * We may have wrong in gpstate->last_lpstate_idx * value. Hence, read from PMCR to get correct data. */ @@ -809,7 +805,7 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy, if (gpstate_idx != new_index) queue_gpstate_timer(gpstates); else - del_timer_sync(&gpstates->timer); + timer_delete_sync(&gpstates->timer); gpstates_done: freq_data.gpstate_id = idx_to_pstate(gpstate_idx); @@ -878,24 +874,33 @@ static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy) return 0; } -static int powernv_cpufreq_cpu_exit(struct cpufreq_policy *policy) +static void powernv_cpufreq_cpu_exit(struct cpufreq_policy *policy) { - /* timer is deleted in cpufreq_cpu_stop() */ - kfree(policy->driver_data); + struct powernv_smp_call_data freq_data; + struct global_pstate_info *gpstates = policy->driver_data; - return 0; + freq_data.pstate_id = idx_to_pstate(powernv_pstate_info.min); + freq_data.gpstate_id = idx_to_pstate(powernv_pstate_info.min); + smp_call_function_single(policy->cpu, set_pstate, &freq_data, 1); + if (gpstates) + timer_delete_sync(&gpstates->timer); + + kfree(policy->driver_data); } static int powernv_cpufreq_reboot_notifier(struct notifier_block *nb, unsigned long action, void *unused) { int cpu; - struct cpufreq_policy cpu_policy; + struct cpufreq_policy *cpu_policy; rebooting = true; for_each_online_cpu(cpu) { - cpufreq_get_policy(&cpu_policy, cpu); - powernv_cpufreq_target_index(&cpu_policy, get_nominal_index()); + cpu_policy = cpufreq_cpu_get(cpu); + if (!cpu_policy) + continue; + powernv_cpufreq_target_index(cpu_policy, get_nominal_index()); + cpufreq_cpu_put(cpu_policy); } return NOTIFY_DONE; @@ -905,13 +910,14 @@ static struct notifier_block powernv_cpufreq_reboot_nb = { .notifier_call = powernv_cpufreq_reboot_notifier, }; -void powernv_cpufreq_work_fn(struct work_struct *work) +static void powernv_cpufreq_work_fn(struct work_struct *work) { struct chip *chip = container_of(work, struct chip, throttle); + struct cpufreq_policy *policy; unsigned int cpu; cpumask_t mask; - get_online_cpus(); + cpus_read_lock(); cpumask_and(&mask, &chip->mask, cpu_online_mask); smp_call_function_any(&mask, powernv_cpufreq_throttle_check, NULL, 0); @@ -922,15 +928,17 @@ void powernv_cpufreq_work_fn(struct work_struct *work) chip->restore = false; for_each_cpu(cpu, &mask) { int index; - struct cpufreq_policy policy; - cpufreq_get_policy(&policy, cpu); - index = cpufreq_table_find_index_c(&policy, policy.cur); - powernv_cpufreq_target_index(&policy, index); - cpumask_andnot(&mask, &mask, policy.cpus); + policy = cpufreq_cpu_get(cpu); + if (!policy) + continue; + index = cpufreq_table_find_index_c(policy, policy->cur, false); + powernv_cpufreq_target_index(policy, index); + cpumask_andnot(&mask, &mask, policy->cpus); + cpufreq_cpu_put(policy); } out: - put_online_cpus(); + cpus_read_unlock(); } static int powernv_cpufreq_occ_msg(struct notifier_block *nb, @@ -1006,25 +1014,13 @@ static struct notifier_block powernv_cpufreq_opal_nb = { .priority = 0, }; -static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy) -{ - struct powernv_smp_call_data freq_data; - struct global_pstate_info *gpstates = policy->driver_data; - - freq_data.pstate_id = idx_to_pstate(powernv_pstate_info.min); - freq_data.gpstate_id = idx_to_pstate(powernv_pstate_info.min); - smp_call_function_single(policy->cpu, set_pstate, &freq_data, 1); - if (gpstates) - del_timer_sync(&gpstates->timer); -} - static unsigned int powernv_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq) { int index; struct powernv_smp_call_data freq_data; - index = cpufreq_table_find_index_dl(policy, target_freq); + index = cpufreq_table_find_index_dl(policy, target_freq, false); freq_data.pstate_id = powernv_freqs[index].driver_data; freq_data.gpstate_id = powernv_freqs[index].driver_data; set_pstate(&freq_data); @@ -1041,15 +1037,27 @@ static struct cpufreq_driver powernv_cpufreq_driver = { .target_index = powernv_cpufreq_target_index, .fast_switch = powernv_fast_switch, .get = powernv_cpufreq_get, - .stop_cpu = powernv_cpufreq_stop_cpu, .attr = powernv_cpu_freq_attr, }; static int init_chip_info(void) { - unsigned int chip[256]; + unsigned int *chip; unsigned int cpu, i; unsigned int prev_chip_id = UINT_MAX; + cpumask_t *chip_cpu_mask; + int ret = 0; + + chip = kcalloc(num_possible_cpus(), sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + /* Allocate a chip cpu mask large enough to fit mask for all chips */ + chip_cpu_mask = kcalloc(MAX_NR_CHIPS, sizeof(cpumask_t), GFP_KERNEL); + if (!chip_cpu_mask) { + ret = -ENOMEM; + goto free_and_return; + } for_each_possible_cpu(cpu) { unsigned int id = cpu_to_chip_id(cpu); @@ -1058,25 +1066,38 @@ static int init_chip_info(void) prev_chip_id = id; chip[nr_chips++] = id; } + cpumask_set_cpu(cpu, &chip_cpu_mask[nr_chips-1]); } chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL); - if (!chips) - return -ENOMEM; + if (!chips) { + ret = -ENOMEM; + goto out_free_chip_cpu_mask; + } for (i = 0; i < nr_chips; i++) { chips[i].id = chip[i]; - cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i])); + cpumask_copy(&chips[i].mask, &chip_cpu_mask[i]); INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn); for_each_cpu(cpu, &chips[i].mask) per_cpu(chip_info, cpu) = &chips[i]; } - return 0; +out_free_chip_cpu_mask: + kfree(chip_cpu_mask); +free_and_return: + kfree(chip); + return ret; } static inline void clean_chip_info(void) { + int i; + + /* flush any pending work items */ + if (chips) + for (i = 0; i < nr_chips; i++) + cancel_work_sync(&chips[i].throttle); kfree(chips); } @@ -1105,26 +1126,20 @@ static int __init powernv_cpufreq_init(void) if (rc) goto out; - register_reboot_notifier(&powernv_cpufreq_reboot_nb); - opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb); - if (powernv_pstate_info.wof_enabled) - powernv_cpufreq_driver.boost_enabled = true; - else - powernv_cpu_freq_attr[SCALING_BOOST_FREQS_ATTR_INDEX] = NULL; + powernv_cpufreq_driver.set_boost = cpufreq_boost_set_sw; rc = cpufreq_register_driver(&powernv_cpufreq_driver); if (rc) { pr_info("Failed to register the cpufreq driver (%d)\n", rc); - goto cleanup_notifiers; + goto cleanup; } - if (powernv_pstate_info.wof_enabled) - cpufreq_enable_boost_support(); + register_reboot_notifier(&powernv_cpufreq_reboot_nb); + opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb); return 0; -cleanup_notifiers: - unregister_all_notifiers(); +cleanup: clean_chip_info(); out: pr_info("Platform driver disabled. System does not support PState control\n"); @@ -1140,5 +1155,6 @@ static void __exit powernv_cpufreq_exit(void) } module_exit(powernv_cpufreq_exit); +MODULE_DESCRIPTION("cpufreq driver for IBM/OpenPOWER powernv systems"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com>"); |
