diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-03 15:59:39 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-03 15:59:39 -0700 |
commit | 40031da445fb4d269af9c7c445b2adf674f171e7 (patch) | |
tree | 021df7906708e939dee9978669a5461b12ff1296 /drivers/cpufreq/cpufreq.c | |
parent | dcaaaeac871ff73043c616db3b2f91482637801d (diff) | |
parent | f41b83126cba53849dd2353476a7715613af648f (diff) |
Merge tag 'pm+acpi-3.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull ACPI and power management updates from Rafael Wysocki:
1) ACPI-based PCI hotplug (ACPIPHP) subsystem rework and introduction
of Intel Thunderbolt support on systems that use ACPI for signalling
Thunderbolt hotplug events. This also should make ACPIPHP work in
some cases in which it was known to have problems. From
Rafael J Wysocki, Mika Westerberg and Kirill A Shutemov.
2) ACPI core code cleanups and dock station support cleanups from
Jiang Liu and Rafael J Wysocki.
3) Fixes for locking problems related to ACPI device hotplug from
Rafael J Wysocki.
4) ACPICA update to version 20130725 includig fixes, cleanups, support
for more than 256 GPEs per GPE block and a change to make the ACPI
PM Timer optional (we've seen systems without the PM Timer in the
field already). One of the fixes, related to the DeRefOf operator,
is necessary to prevent some Windows 8 oriented AML from causing
problems to happen. From Bob Moore, Lv Zheng, and Jung-uk Kim.
5) Removal of the old and long deprecated /proc/acpi/event interface
and related driver changes from Thomas Renninger.
6) ACPI and Xen changes to make the reduced hardware sleep work with
the latter from Ben Guthro.
7) ACPI video driver cleanups and a blacklist of systems that should
not tell the BIOS that they are compatible with Windows 8 (or ACPI
backlight and possibly other things will not work on them). From
Felipe Contreras.
8) Assorted ACPI fixes and cleanups from Aaron Lu, Hanjun Guo,
Kuppuswamy Sathyanarayanan, Lan Tianyu, Sachin Kamat, Tang Chen,
Toshi Kani, and Wei Yongjun.
9) cpufreq ondemand governor target frequency selection change to
reduce oscillations between min and max frequencies (essentially,
it causes the governor to choose target frequencies proportional
to load) from Stratos Karafotis.
10) cpufreq fixes allowing sysfs attributes file permissions to be
preserved over suspend/resume cycles Srivatsa S Bhat.
11) Removal of Device Tree parsing for CPU device nodes from multiple
cpufreq drivers that required some changes related to
of_get_cpu_node() to be made in a few architectures and in the
driver core. From Sudeep KarkadaNagesha.
12) cpufreq core fixes and cleanups related to mutual exclusion and
driver module references from Viresh Kumar, Lukasz Majewski and
Rafael J Wysocki.
13) Assorted cpufreq fixes and cleanups from Amit Daniel Kachhap,
Bartlomiej Zolnierkiewicz, Hanjun Guo, Jingoo Han, Joseph Lo,
Julia Lawall, Li Zhong, Mark Brown, Sascha Hauer, Stephen Boyd,
Stratos Karafotis, and Viresh Kumar.
14) Fixes to prevent race conditions in coupled cpuidle from happening
from Colin Cross.
15) cpuidle core fixes and cleanups from Daniel Lezcano and
Tuukka Tikkanen.
16) Assorted cpuidle fixes and cleanups from Daniel Lezcano,
Geert Uytterhoeven, Jingoo Han, Julia Lawall, Linus Walleij,
and Sahara.
17) System sleep tracing changes from Todd E Brandt and Shuah Khan.
18) PNP subsystem conversion to using struct dev_pm_ops for power
management from Shuah Khan.
* tag 'pm+acpi-3.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (217 commits)
cpufreq: Don't use smp_processor_id() in preemptible context
cpuidle: coupled: fix race condition between pokes and safe state
cpuidle: coupled: abort idle if pokes are pending
cpuidle: coupled: disable interrupts after entering safe state
ACPI / hotplug: Remove containers synchronously
driver core / ACPI: Avoid device hot remove locking issues
cpufreq: governor: Fix typos in comments
cpufreq: governors: Remove duplicate check of target freq in supported range
cpufreq: Fix timer/workqueue corruption due to double queueing
ACPI / EC: Add ASUSTEK L4R to quirk list in order to validate ECDT
ACPI / thermal: Add check of "_TZD" availability and evaluating result
cpufreq: imx6q: Fix clock enable balance
ACPI: blacklist win8 OSI for buggy laptops
cpufreq: tegra: fix the wrong clock name
cpuidle: Change struct menu_device field types
cpuidle: Add a comment warning about possible overflow
cpuidle: Fix variable domains in get_typical_interval()
cpuidle: Fix menu_device->intervals type
cpuidle: CodingStyle: Break up multiple assignments on single line
cpuidle: Check called function parameter in get_typical_interval()
...
Diffstat (limited to 'drivers/cpufreq/cpufreq.c')
-rw-r--r-- | drivers/cpufreq/cpufreq.c | 738 |
1 files changed, 393 insertions, 345 deletions
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index f0a5e2b0eb8a..5c75e3147a60 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -17,24 +17,17 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <asm/cputime.h> -#include <linux/kernel.h> -#include <linux/kernel_stat.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/notifier.h> +#include <linux/cpu.h> #include <linux/cpufreq.h> #include <linux/delay.h> -#include <linux/interrupt.h> -#include <linux/spinlock.h> -#include <linux/tick.h> #include <linux/device.h> -#include <linux/slab.h> -#include <linux/cpu.h> -#include <linux/completion.h> +#include <linux/init.h> +#include <linux/kernel_stat.h> +#include <linux/module.h> #include <linux/mutex.h> +#include <linux/slab.h> #include <linux/syscore_ops.h> - +#include <linux/tick.h> #include <trace/events/power.h> /** @@ -44,8 +37,10 @@ */ static struct cpufreq_driver *cpufreq_driver; static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); +static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data_fallback); static DEFINE_RWLOCK(cpufreq_driver_lock); static DEFINE_MUTEX(cpufreq_governor_lock); +static LIST_HEAD(cpufreq_policy_list); #ifdef CONFIG_HOTPLUG_CPU /* This one keeps track of the previously set governor of a removed CPU */ @@ -69,15 +64,14 @@ static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor); * - Lock should not be held across * __cpufreq_governor(data, CPUFREQ_GOV_STOP); */ -static DEFINE_PER_CPU(int, cpufreq_policy_cpu); static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem); #define lock_policy_rwsem(mode, cpu) \ static int lock_policy_rwsem_##mode(int cpu) \ { \ - int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); \ - BUG_ON(policy_cpu == -1); \ - down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \ + struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); \ + BUG_ON(!policy); \ + down_##mode(&per_cpu(cpu_policy_rwsem, policy->cpu)); \ \ return 0; \ } @@ -88,14 +82,20 @@ lock_policy_rwsem(write, cpu); #define unlock_policy_rwsem(mode, cpu) \ static void unlock_policy_rwsem_##mode(int cpu) \ { \ - int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); \ - BUG_ON(policy_cpu == -1); \ - up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \ + struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); \ + BUG_ON(!policy); \ + up_##mode(&per_cpu(cpu_policy_rwsem, policy->cpu)); \ } unlock_policy_rwsem(read, cpu); unlock_policy_rwsem(write, cpu); +/* + * rwsem to guarantee that cpufreq driver module doesn't unload during critical + * sections + */ +static DECLARE_RWSEM(cpufreq_rwsem); + /* internal prototypes */ static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); @@ -183,78 +183,46 @@ u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy) } EXPORT_SYMBOL_GPL(get_cpu_idle_time); -static struct cpufreq_policy *__cpufreq_cpu_get(unsigned int cpu, bool sysfs) +struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) { - struct cpufreq_policy *data; + struct cpufreq_policy *policy = NULL; unsigned long flags; - if (cpu >= nr_cpu_ids) - goto err_out; + if (cpufreq_disabled() || (cpu >= nr_cpu_ids)) + return NULL; + + if (!down_read_trylock(&cpufreq_rwsem)) + return NULL; /* get the cpufreq driver */ read_lock_irqsave(&cpufreq_driver_lock, flags); - if (!cpufreq_driver) - goto err_out_unlock; - - if (!try_module_get(cpufreq_driver->owner)) - goto err_out_unlock; - - /* get the CPU */ - data = per_cpu(cpufreq_cpu_data, cpu); - - if (!data) - goto err_out_put_module; - - if (!sysfs && !kobject_get(&data->kobj)) - goto err_out_put_module; + if (cpufreq_driver) { + /* get the CPU */ + policy = per_cpu(cpufreq_cpu_data, cpu); + if (policy) + kobject_get(&policy->kobj); + } read_unlock_irqrestore(&cpufreq_driver_lock, flags); - return data; -err_out_put_module: - module_put(cpufreq_driver->owner); -err_out_unlock: - read_unlock_irqrestore(&cpufreq_driver_lock, flags); -err_out: - return NULL; -} - -struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) -{ - if (cpufreq_disabled()) - return NULL; + if (!policy) + up_read(&cpufreq_rwsem); - return __cpufreq_cpu_get(cpu, false); + return policy; } EXPORT_SYMBOL_GPL(cpufreq_cpu_get); -static struct cpufreq_policy *cpufreq_cpu_get_sysfs(unsigned int cpu) -{ - return __cpufreq_cpu_get(cpu, true); -} - -static void __cpufreq_cpu_put(struct cpufreq_policy *data, bool sysfs) -{ - if (!sysfs) - kobject_put(&data->kobj); - module_put(cpufreq_driver->owner); -} - -void cpufreq_cpu_put(struct cpufreq_policy *data) +void cpufreq_cpu_put(struct cpufreq_policy *policy) { if (cpufreq_disabled()) return; - __cpufreq_cpu_put(data, false); + kobject_put(&policy->kobj); + up_read(&cpufreq_rwsem); } EXPORT_SYMBOL_GPL(cpufreq_cpu_put); -static void cpufreq_cpu_put_sysfs(struct cpufreq_policy *data) -{ - __cpufreq_cpu_put(data, true); -} - /********************************************************************* * EXTERNALLY AFFECTING FREQUENCY CHANGES * *********************************************************************/ @@ -459,8 +427,8 @@ show_one(scaling_min_freq, min); show_one(scaling_max_freq, max); show_one(scaling_cur_freq, cur); -static int __cpufreq_set_policy(struct cpufreq_policy *data, - struct cpufreq_policy *policy); +static int __cpufreq_set_policy(struct cpufreq_policy *policy, + struct cpufreq_policy *new_policy); /** * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access @@ -699,12 +667,12 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) struct cpufreq_policy *policy = to_policy(kobj); struct freq_attr *fattr = to_attr(attr); ssize_t ret = -EINVAL; - policy = cpufreq_cpu_get_sysfs(policy->cpu); - if (!policy) - goto no_policy; + + if (!down_read_trylock(&cpufreq_rwsem)) + goto exit; if (lock_policy_rwsem_read(policy->cpu) < 0) - goto fail; + goto up_read; if (fattr->show) ret = fattr->show(policy, buf); @@ -712,9 +680,10 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) ret = -EIO; unlock_policy_rwsem_read(policy->cpu); -fail: - cpufreq_cpu_put_sysfs(policy); -no_policy: + +up_read: + up_read(&cpufreq_rwsem); +exit: return ret; } @@ -724,12 +693,12 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, struct cpufreq_policy *policy = to_policy(kobj); struct freq_attr *fattr = to_attr(attr); ssize_t ret = -EINVAL; - policy = cpufreq_cpu_get_sysfs(policy->cpu); - if (!policy) - goto no_policy; + + if (!down_read_trylock(&cpufreq_rwsem)) + goto exit; if (lock_policy_rwsem_write(policy->cpu) < 0) - goto fail; + goto up_read; if (fattr->store) ret = fattr->store(policy, buf, count); @@ -737,9 +706,10 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, ret = -EIO; unlock_policy_rwsem_write(policy->cpu); -fail: - cpufreq_cpu_put_sysfs(policy); -no_policy: + +up_read: + up_read(&cpufreq_rwsem); +exit: return ret; } @@ -805,41 +775,32 @@ void cpufreq_sysfs_remove_file(const struct attribute *attr) EXPORT_SYMBOL(cpufreq_sysfs_remove_file); /* symlink affected CPUs */ -static int cpufreq_add_dev_symlink(unsigned int cpu, - struct cpufreq_policy *policy) +static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy) { unsigned int j; int ret = 0; for_each_cpu(j, policy->cpus) { - struct cpufreq_policy *managed_policy; struct device *cpu_dev; - if (j == cpu) + if (j == policy->cpu) continue; - pr_debug("CPU %u already managed, adding link\n", j); - managed_policy = cpufreq_cpu_get(cpu); + pr_debug("Adding link for CPU: %u\n", j); cpu_dev = get_cpu_device(j); ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj, "cpufreq"); - if (ret) { - cpufreq_cpu_put(managed_policy); - return ret; - } + if (ret) + break; } return ret; } -static int cpufreq_add_dev_interface(unsigned int cpu, - struct cpufreq_policy *policy, +static int cpufreq_add_dev_interface(struct cpufreq_policy *policy, struct device *dev) { - struct cpufreq_policy new_policy; struct freq_attr **drv_attr; - unsigned long flags; int ret = 0; - unsigned int j; /* prepare interface data */ ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, @@ -871,18 +832,24 @@ static int cpufreq_add_dev_interface(unsigned int cpu, goto err_out_kobj_put; } - write_lock_irqsave(&cpufreq_driver_lock, flags); - for_each_cpu(j, policy->cpus) { - per_cpu(cpufreq_cpu_data, j) = policy; - per_cpu(cpufreq_policy_cpu, j) = policy->cpu; - } - write_unlock_irqrestore(&cpufreq_driver_lock, flags); - - ret = cpufreq_add_dev_symlink(cpu, policy); + ret = cpufreq_add_dev_symlink(policy); if (ret) goto err_out_kobj_put; - memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); + return ret; + +err_out_kobj_put: + kobject_put(&policy->kobj); + wait_for_completion(&policy->kobj_unregister); + return ret; +} + +static void cpufreq_init_policy(struct cpufreq_policy *policy) +{ + struct cpufreq_policy new_policy; + int ret = 0; + + memcpy(&new_policy, policy, sizeof(*policy)); /* assure that the starting sequence is run in __cpufreq_set_policy */ policy->governor = NULL; @@ -896,72 +863,106 @@ static int cpufreq_add_dev_interface(unsigned int cpu, if (cpufreq_driver->exit) cpufreq_driver->exit(policy); } - return ret; - -err_out_kobj_put: - kobject_put(&policy->kobj); - wait_for_completion(&policy->kobj_unregister); - return ret; } #ifdef CONFIG_HOTPLUG_CPU -static int cpufreq_add_policy_cpu(unsigned int cpu, unsigned int sibling, - struct device *dev) +static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, + unsigned int cpu, struct device *dev, + bool frozen) { - struct cpufreq_policy *policy; int ret = 0, has_target = !!cpufreq_driver->target; unsigned long flags; - policy = cpufreq_cpu_get(sibling); - WARN_ON(!policy); - - if (has_target) - __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + if (has_target) { + ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + if (ret) { + pr_err("%s: Failed to stop governor\n", __func__); + return ret; + } + } - lock_policy_rwsem_write(sibling); + lock_policy_rwsem_write(policy->cpu); write_lock_irqsave(&cpufreq_driver_lock, flags); cpumask_set_cpu(cpu, policy->cpus); - per_cpu(cpufreq_policy_cpu, cpu) = policy->cpu; per_cpu(cpufreq_cpu_data, cpu) = policy; write_unlock_irqrestore(&cpufreq_driver_lock, flags); - unlock_policy_rwsem_write(sibling); + unlock_policy_rwsem_write(policy->cpu); if (has_target) { - __cpufreq_governor(policy, CPUFREQ_GOV_START); - __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + if ((ret = __cpufreq_governor(policy, CPUFREQ_GOV_START)) || + (ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS))) { + pr_err("%s: Failed to start governor\n", __func__); + return ret; + } } - ret = sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq"); - if (ret) { - cpufreq_cpu_put(policy); - return ret; - } + /* Don't touch sysfs links during light-weight init */ + if (!frozen) + ret = sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq"); - return 0; + return ret; } #endif -/** - * cpufreq_add_dev - add a CPU device - * - * Adds the cpufreq interface for a CPU device. - * - * The Oracle says: try running cpufreq registration/unregistration concurrently - * with with cpu hotplugging and all hell will break loose. Tried to clean this - * mess up, but more thorough testing is needed. - Mathieu - */ -static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) +static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu) +{ + struct cpufreq_policy *policy; + unsigned long flags; + + write_lock_irqsave(&cpufreq_driver_lock, flags); + + policy = per_cpu(cpufreq_cpu_data_fallback, cpu); + + write_unlock_irqrestore(&cpufreq_driver_lock, flags); + + return policy; +} + +static struct cpufreq_policy *cpufreq_policy_alloc(void) +{ + struct cpufreq_policy *policy; + + policy = kzalloc(sizeof(*policy), GFP_KERNEL); + if (!policy) + return NULL; + + if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL)) + goto err_free_policy; + + if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) + goto err_free_cpumask; + + INIT_LIST_HEAD(&policy->policy_list); + return policy; + +err_free_cpumask: + free_cpumask_var(policy->cpus); +err_free_policy: + kfree(policy); + + return NULL; +} + +static void cpufreq_policy_free(struct cpufreq_policy *policy) +{ + free_cpumask_var(policy->related_cpus); + free_cpumask_var(policy->cpus); + kfree(policy); +} + +static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif, + bool frozen) { unsigned int j, cpu = dev->id; int ret = -ENOMEM; struct cpufreq_policy *policy; unsigned long flags; #ifdef CONFIG_HOTPLUG_CPU + struct cpufreq_policy *tpolicy; struct cpufreq_governor *gov; - int sibling; #endif if (cpu_is_offline(cpu)) @@ -977,43 +978,38 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) cpufreq_cpu_put(policy); return 0; } +#endif + + if (!down_read_trylock(&cpufreq_rwsem)) + return 0; #ifdef CONFIG_HOTPLUG_CPU /* Check if this cpu was hot-unplugged earlier and has siblings */ read_lock_irqsave(&cpufreq_driver_lock, flags); - for_each_online_cpu(sibling) { - struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling); - if (cp && cpumask_test_cpu(cpu, cp->related_cpus)) { + list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) { + if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) { read_unlock_irqrestore(&cpufreq_driver_lock, flags); - return cpufreq_add_policy_cpu(cpu, sibling, dev); + ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev, frozen); + up_read(&cpufreq_rwsem); + return ret; } } read_unlock_irqrestore(&cpufreq_driver_lock, flags); #endif -#endif - if (!try_module_get(cpufreq_driver->owner)) { - ret = -EINVAL; - goto module_out; - } + if (frozen) + /* Restore the saved policy when doing light-weight init */ + policy = cpufreq_policy_restore(cpu); + else + policy = cpufreq_policy_alloc(); - policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL); if (!policy) goto nomem_out; - if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL)) - goto err_free_policy; - - if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) - goto err_free_cpumask; - policy->cpu = cpu; policy->governor = CPUFREQ_DEFAULT_GOVERNOR; cpumask_copy(policy->cpus, cpumask_of(cpu)); - /* Initially set CPU itself as the policy_cpu */ - per_cpu(cpufreq_policy_cpu, cpu) = cpu; - init_completion(&policy->kobj_unregister); INIT_WORK(&policy->update, handle_update); @@ -1050,12 +1046,26 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) } #endif - ret = cpufreq_add_dev_interface(cpu, policy, dev); - if (ret) - goto err_out_unregister; + write_lock_irqsave(&cpufreq_driver_lock, flags); + for_each_cpu(j, policy->cpus) + per_cpu(cpufreq_cpu_data, j) = policy; + write_unlock_irqrestore(&cpufreq_driver_lock, flags); + + if (!frozen) { + ret = cpufreq_add_dev_interface(policy, dev); + if (ret) + goto err_out_unregister; + } + + write_lock_irqsave(&cpufreq_driver_lock, flags); + list_add(&policy->policy_list, &cpufreq_policy_list); + write_unlock_irqrestore(&cpufreq_driver_lock, flags); + + cpufreq_init_policy(policy); kobject_uevent(&policy->kobj, KOBJ_ADD); - module_put(cpufreq_driver->owner); + up_read(&cpufreq_rwsem); + pr_debug("initialization complete\n"); return 0; @@ -1066,32 +1076,33 @@ err_out_unregister: per_cpu(cpufreq_cpu_data, j) = NULL; write_unlock_irqrestore(&cpufreq_driver_lock, flags); - kobject_put(&policy->kobj); - wait_for_completion(&policy->kobj_unregister); - err_set_policy_cpu: - per_cpu(cpufreq_policy_cpu, cpu) = -1; - free_cpumask_var(policy->related_cpus); -err_free_cpumask: - free_cpumask_var(policy->cpus); -err_free_policy: - kfree(policy); + cpufreq_policy_free(policy); nomem_out: - module_put(cpufreq_driver->owner); -module_out: + up_read(&cpufreq_rwsem); + return ret; } -static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) +/** + * cpufreq_add_dev - add a CPU device + * + * Adds the cpufreq interface for a CPU device. + * + * The Oracle says: try running cpufreq registration/unregistration concurrently + * with with cpu hotplugging and all hell will break loose. Tried to clean this + * mess up, but more thorough testing is needed. - Mathieu + */ +static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) { - int j; + return __cpufreq_add_dev(dev, sif, false); +} +static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) +{ policy->last_cpu = policy->cpu; policy->cpu = cpu; - for_each_cpu(j, policy->cpus) - per_cpu(cpufreq_policy_cpu, j) = cpu; - #ifdef CONFIG_CPU_FREQ_TABLE cpufreq_frequency_table_update_policy_cpu(policy); #endif @@ -1099,6 +1110,37 @@ static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) CPUFREQ_UPDATE_POLICY_CPU, policy); } +static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy, + unsigned int old_cpu, bool frozen) +{ + struct device *cpu_dev; + int ret; + + /* first sibling now owns the new sysfs dir */ + cpu_dev = get_cpu_device(cpumask_first(policy->cpus)); + + /* Don't touch sysfs files during light-weight tear-down */ + if (frozen) + return cpu_dev->id; + + sysfs_remove_link(&cpu_dev->kobj, "cpufreq"); + ret = kobject_move(&policy->kobj, &cpu_dev->kobj); + if (ret) { + pr_err("%s: Failed to move kobj: %d", __func__, ret); + + WARN_ON(lock_policy_rwsem_write(old_cpu)); + cpumask_set_cpu(old_cpu, policy->cpus); + unlock_policy_rwsem_write(old_cpu); + + ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj, + "cpufreq"); + + return -EINVAL; + } + + return cpu_dev->id; +} + /** * __cpufreq_remove_dev - remove a CPU device * @@ -1107,111 +1149,126 @@ static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) * This routine frees the rwsem before returning. */ static int __cpufreq_remove_dev(struct device *dev, - struct subsys_interface *sif) + struct subsys_interface *sif, bool frozen) { - unsigned int cpu = dev->id, ret, cpus; + unsigned int cpu = dev->id, cpus; + int new_cpu, ret; unsigned long flags; - struct cpufreq_policy *data; + struct cpufreq_policy *policy; struct kobject *kobj; struct completion *cmp; - struct device *cpu_dev; pr_debug("%s: unregistering CPU %u\n", __func__, cpu); write_lock_irqsave(&cpufreq_driver_lock, flags); - data = per_cpu(cpufreq_cpu_data, cpu); - per_cpu(cpufreq_cpu_data, cpu) = NULL; + policy = per_cpu(cpufreq_cpu_data, cpu); + + /* Save the policy somewhere when doing a light-weight tear-down */ + if (frozen) + per_cpu(cpufreq_cpu_data_fallback, cpu) = policy; write_unlock_irqrestore(&cpufreq_driver_lock, flags); - if (!data) { + if (!policy) { pr_debug("%s: No cpu_data found\n", __func__); return -EINVAL; } - if (cpufreq_driver->target) - __cpufreq_governor(data, CPUFREQ_GOV_STOP); + if (cpufreq_driver->target) { + ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + if (ret) { + pr_err("%s: Failed to stop governor\n", __func__); + return ret; + } + } #ifdef CONFIG_HOTPLUG_CPU if (!cpufreq_driver->setpolicy) strncpy(per_cpu(cpufreq_cpu_governor, cpu), - data->governor->name, CPUFREQ_NAME_LEN); + policy->governor->name, CPUFREQ_NAME_LEN); #endif WARN_ON(lock_policy_rwsem_write(cpu)); - cpus = cpumask_weight(data->cpus); + cpus = cpumask_weight(policy->cpus); if (cpus > 1) - cpumask_clear_cpu(cpu, data->cpus); + cpumask_clear_cpu(cpu, policy->cpus); unlock_policy_rwsem_write(cpu); - if (cpu != data->cpu) { + if (cpu != policy->cpu && !frozen) { sysfs_remove_link(&dev->kobj, "cpufreq"); } else if (cpus > 1) { - /* first sibling now owns the new sysfs dir */ - cpu_dev = get_cpu_device(cpumask_first(data->cpus)); - sysfs_remove_link(&cpu_dev->kobj, "cpufreq"); - ret = kobject_move(&data->kobj, &cpu_dev->kobj); - if (ret) { - pr_err("%s: Failed to move kobj: %d", __func__, ret); + new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu, frozen); + if (new_cpu >= 0) { WARN_ON(lock_policy_rwsem_write(cpu)); - cpumask_set_cpu(cpu, data->cpus); - - write_lock_irqsave(&cpufreq_driver_lock, flags); - per_cpu(cpufreq_cpu_data, cpu) = data; - write_unlock_irqrestore(&cpufreq_driver_lock, flags); - + update_policy_cpu(policy, new_cpu); unlock_policy_rwsem_write(cpu); - ret = sysfs_create_link(&cpu_dev->kobj, &data->kobj, - "cpufreq"); - return -EINVAL; + if (!frozen) { + pr_debug("%s: policy Kobject moved to cpu: %d " + "from: %d\n",__func__, new_cpu, cpu); + } } - - WARN_ON(lock_policy_rwsem_write(cpu)); - update_policy_cpu(data, cpu_dev->id); - unlock_policy_rwsem_write(cpu); - pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n", - __func__, cpu_dev->id, cpu); } /* If cpu is last user of policy, free policy */ if (cpus == 1) { - if (cpufreq_driver->target) - __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT); - - lock_policy_rwsem_read(cpu); - kobj = &data->kobj; - cmp = &data->kobj_unregister; - unlock_policy_rwsem_read(cpu); - kobject_put(kobj); - - /* we need to make sure that the underlying kobj is actually - * not referenced anymore by anybody before we proceed with - * unloading. - */ - pr_debug("waiting for dropping of refcount\n"); - wait_for_completion(cmp); - pr_debug("wait complete\n"); + if (cpufreq_driver->target) { + ret = __cpufreq_governor(policy, + CPUFREQ_GOV_POLICY_EXIT); + if (ret) { + pr_err("%s: Failed to exit governor\n", + __func__); + return ret; + } + } + + if (!frozen) { + lock_policy_rwsem_read(cpu); + kobj = &policy->kobj; + cmp = &policy->kobj_unregister; + unlock_policy_rwsem_read(cpu); + kobject_put(kobj); + + /* + * We need to make sure that the underlying kobj is + * actually not referenced anymore by anybody before we + * proceed with unloading. + */ + pr_debug("waiting for dropping of refcount\n"); + wait_for_completion(cmp); + pr_debug("wait complete\n"); + } + /* + * Perform the ->exit() even during light-weight tear-down, + * since this is a core component, and is essential for the + * subsequent light-weight ->init() to succeed. + */ if (cpufreq_driver->exit) - cpufreq_driver->exit(data); + cpufreq_driver->exit(policy); - free_cpumask_var(data->related_cpus); - free_cpumask_var(data->cpus); - kfree(data); + /* Remove policy from list of active policies */ + write_lock_irqsave(&cpufreq_driver_lock, flags); + list_del(&policy->policy_list); + write_unlock_irqrestore(&cpufreq_driver_lock, flags); + + if (!frozen) + cpufreq_policy_free(policy); } else { - pr_debug("%s: removing link, cpu: %d\n", __func__, cpu); - cpufreq_cpu_put(data); if (cpufreq_driver->target) { - __cpufreq_governor(data, CPUFREQ_GOV_START); - __cpufreq_governor(data, CPUFREQ_GOV_LIMITS); + if ((ret = __cpufreq_governor(policy, CPUFREQ_GOV_START)) || + (ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS))) { + pr_err("%s: Failed to start governor\n", + __func__); + return ret; + } } } - per_cpu(cpufreq_policy_cpu, cpu) = -1; + per_cpu(cpufreq_cpu_data, cpu) = NULL; return 0; } @@ -1223,7 +1280,7 @@ static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) if (cpu_is_offline(cpu)) return 0; - retval = __cpufreq_remove_dev(dev, sif); + retval = __cpufreq_remove_dev(dev, sif, false); return retval; } @@ -1344,10 +1401,9 @@ static unsigned int __cpufreq_get(unsigned int cpu) unsigned int cpufreq_get(unsigned int cpu) { unsigned int ret_freq = 0; - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - if (!policy) - goto out; + if (!down_read_trylock(&cpufreq_rwsem)) + return 0; if (unlikely(lock_policy_rwsem_read(cpu))) goto out_policy; @@ -1357,8 +1413,8 @@ unsigned int cpufreq_get(unsigned int cpu) unlock_policy_rwsem_read(cpu); out_policy: - cpufreq_cpu_put(policy); -out: + up_read(&cpufreq_rwsem); + return ret_freq; } EXPORT_SYMBOL(cpufreq_get); @@ -1381,23 +1437,23 @@ static int cpufreq_bp_suspend(void) int ret = 0; int cpu = smp_processor_id(); - struct cpufreq_policy *cpu_policy; + struct cpufreq_policy *policy; pr_debug("suspending cpu %u\n", cpu); /* If there's no policy for the boot CPU, we have nothing to do. */ - cpu_policy = cpufreq_cpu_get(cpu); - if (!cpu_policy) + policy = cpufreq_cpu_get(cpu); + if (!policy) return 0; if (cpufreq_driver->suspend) { - ret = cpufreq_driver->suspend(cpu_policy); + ret = cpufreq_driver->suspend(policy); if (ret) printk(KERN_ERR "cpufreq: suspend failed in ->suspend " - "step on CPU %u\n", cpu_policy->cpu); + "step on CPU %u\n", policy->cpu); } - cpufreq_cpu_put(cpu_policy); + cpufreq_cpu_put(policy); return ret; } @@ -1419,28 +1475,28 @@ static void cpufreq_bp_resume(void) int ret = 0; int cpu = smp_processor_id(); - struct cpufreq_policy *cpu_policy; + struct cpufreq_policy *policy; pr_debug("resuming cpu %u\n", cpu); /* If there's no policy for the boot CPU, we have nothing to do. */ - cpu_policy = cpufreq_cpu_get(cpu); - if (!cpu_policy) + policy = cpufreq_cpu_get(cpu); + if (!policy) return; if (cpufreq_driver->resume) { - ret = cpufreq_driver->resume(cpu_policy); + ret = cpufreq_driver->resume(policy); if (ret) { printk(KERN_ERR "cpufreq: resume failed in ->resume " - "step on CPU %u\n", cpu_policy->cpu); + "step on CPU %u\n", policy->cpu); goto fail; } } - schedule_work(&cpu_policy->update); + schedule_work(&policy->update); fail: - cpufreq_cpu_put(cpu_policy); + cpufreq_cpu_put(policy); } static struct syscore_ops cpufreq_syscore_ops = { @@ -1594,18 +1650,6 @@ fail: } EXPORT_SYMBOL_GPL(cpufreq_driver_target); -int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu) -{ - if (cpufreq_disabled()) - return 0; - - if (!cpufreq_driver->getavg) - return 0; - - return cpufreq_driver->getavg(policy, cpu); -} -EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg); - /* * when "event" is CPUFREQ_GOV_LIMITS */ @@ -1640,8 +1684,9 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, } } - if (!try_module_get(policy->governor->owner)) - return -EINVAL; + if (event == CPUFREQ_GOV_POLICY_INIT) + if (!try_module_get(policy->governor->owner)) + return -EINVAL; pr_debug("__cpufreq_governor for CPU %u, event %u\n", policy->cpu, event); @@ -1677,11 +1722,8 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, mutex_unlock(&cpufreq_governor_lock); } - /* we keep one module reference alive for - each CPU governed by this CPU */ - if ((event != CPUFREQ_GOV_START) || ret) - module_put(policy->governor->owner); - if ((event == CPUFREQ_GOV_STOP) && !ret) + if (((event == CPUFREQ_GOV_POLICY_INIT) && ret) || + ((event == CPUFREQ_GOV_POLICY_EXIT) && !ret)) module_put(policy->governor->owner); return ret; @@ -1761,7 +1803,7 @@ int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu) if (!cpu_policy) return -EINVAL; - memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy)); + memcpy(policy, cpu_policy, sizeof(*policy)); cpufreq_cpu_put(cpu_policy); return 0; @@ -1772,95 +1814,94 @@ EXPORT_SYMBOL(cpufreq_get_policy); * data : current policy. * policy : policy to be set. */ -static int __cpufreq_set_policy(struct cpufreq_policy *data, - struct cpufreq_policy *policy) +static int __cpufreq_set_policy(struct cpufreq_policy *policy, + struct cpufreq_policy *new_policy) { int ret = 0, failed = 1; - pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu, - policy->min, policy->max); + pr_debug("setting new policy for CPU %u: %u - %u kHz\n", new_policy->cpu, + new_policy->min, new_policy->max); - memcpy(&policy->cpuinfo, &data->cpuinfo, - sizeof(struct cpufreq_cpuinfo)); + memcpy(&new_policy->cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo)); - if (policy->min > data->max || policy->max < data->min) { + if (new_policy->min > policy->max || new_policy->max < policy->min) { ret = -EINVAL; goto error_out; } /* verify the cpu speed can be set within this limit */ - ret = cpufreq_driver->verify(policy); + ret = cpufreq_driver->verify(new_policy); if (ret) goto error_out; /* adjust if necessary - all reasons */ blocking_notifier_call_chain(&cpufreq_policy_notifier_list, - CPUFREQ_ADJUST, policy); + CPUFREQ_ADJUST, new_policy); /* adjust if necessary - hardware incompatibility*/ blocking_notifier_call_chain(&cpufreq_policy_notifier_list, - CPUFREQ_INCOMPATIBLE, policy); + CPUFREQ_INCOMPATIBLE, new_policy); /* * verify the cpu speed can be set within this limit, which might be * different to the first one */ - ret = cpufreq_driver->verify(policy); + ret = cpufreq_driver->verify(new_policy); if (ret) goto error_out; /* notification of the new policy */ blocking_notifier_call_chain(&cpufreq_policy_notifier_list, - CPUFREQ_NOTIFY, policy); + CPUFREQ_NOTIFY, new_policy); - data->min = policy->min; - data->max = policy->max; + policy->min = new_policy->min; + policy->max = new_policy->max; pr_debug("new min and max freqs are %u - %u kHz\n", - data->min, data->max); + policy->min, policy->max); if (cpufreq_driver->setpolicy) { - data->policy = policy->policy; + policy->policy = new_policy->policy; pr_debug("setting range\n"); - ret = cpufreq_driver->setpolicy(policy); + ret = cpufreq_driver->setpolicy(new_policy); } else { - if (policy->governor != data->governor) { + if (new_policy->governor != policy->governor) { /* save old, working values */ - struct cpufreq_governor *old_gov = data->governor; + struct cpufreq_governor *old_gov = policy->governor; pr_debug("governor switch\n"); /* end old governor */ - if (data->governor) { - __cpufreq_governor(data, CPUFREQ_GOV_STOP); - unlock_policy_rwsem_write(policy->cpu); - __cpufreq_governor(data, + if (policy->governor) { + __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + unlock_policy_rwsem_write(new_policy->cpu); + __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); - lock_policy_rwsem_write(policy->cpu); + lock_policy_rwsem_write(new_policy->cpu); } /* start new governor */ - data->governor = policy->governor; - if (!__cpufreq_governor(data, CPUFREQ_GOV_POLICY_INIT)) { - if (!__cpufreq_governor(data, CPUFREQ_GOV_START)) { + policy->governor = new_policy->governor; + if (!__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) { + if (!__cpufreq_governor(policy, CPUFREQ_GOV_START)) { failed = 0; } else { - unlock_policy_rwsem_write(policy->cpu); - __cpufreq_governor(data, + unlock_policy_rwsem_write(new_policy->cpu); + __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); - lock_policy_rwsem_write(policy->cpu); + lock_policy_rwsem_write(new_policy->cpu); } } if (failed) { /* new governor failed, so re-start old one */ pr_debug("starting governor %s failed\n", - data->governor->name); + policy->governor->name); if (old_gov) { - data->governor = old_gov; - __cpufreq_governor(data, + policy->governor = old_gov; + __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT); - __cpufreq_governor(data, + __cpufreq_governor(policy, CPUFREQ_GOV_START); } ret = -EINVAL; @@ -1869,7 +1910,7 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data, /* might be a policy change, too, so fall through */ } pr_debug("governor: change or update limits\n"); - __cpufreq_governor(data, CPUFREQ_GOV_LIMITS); + ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); } error_out: @@ -1885,11 +1926,11 @@ error_out: */ int cpufreq_update_policy(unsigned int cpu) { - struct cpufreq_policy *data = cpufreq_cpu_get(cpu); - struct cpufreq_policy policy; + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct cpufreq_policy new_policy; int ret; - if (!data) { + if (!policy) { ret = -ENODEV; goto no_policy; } @@ -1900,34 +1941,34 @@ int cpufreq_update_policy(unsigned int cpu) } pr_debug("updating policy for CPU %u\n", cpu); - memcpy(&policy, data, sizeof(struct cpufreq_policy)); - policy.min = data->user_policy.min; - policy.max = data->user_policy.max; - policy.policy = data->user_policy.policy; - policy.governor = data->user_policy.governor; + memcpy(&new_policy, policy, sizeof(*policy)); + new_policy.min = policy->user_policy.min; + new_policy.max = policy->user_policy.max; + new_policy.policy = policy->user_policy.policy; + new_policy.governor = policy->user_policy.governor; /* * BIOS might change freq behind our back * -> ask driver for current freq and notify governors about a change */ if (cpufreq_driver->get) { - policy.cur = cpufreq_driver->get(cpu); - if (!data->cur) { + new_policy.cur = cpufreq_driver->get(cpu); + if (!policy->cur) { pr_debug("Driver did not initialize current freq"); - data->cur = policy.cur; + policy->cur = new_policy.cur; } else { - if (data->cur != policy.cur && cpufreq_driver->target) - cpufreq_out_of_sync(cpu, data->cur, - policy.cur); + if (policy->cur != new_policy.cur && cpufreq_driver->target) + cpufreq_out_of_sync(cpu, policy->cur, + new_policy.cur); } } - ret = __cpufreq_set_policy(data, &policy); + ret = __cpufreq_set_policy(policy, &new_policy); unlock_policy_rwsem_write(cpu); fail: - cpufreq_cpu_put(data); + cpufreq_cpu_put(policy); no_policy: return ret; } @@ -1938,21 +1979,26 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, { unsigned int cpu = (unsigned long)hcpu; struct device *dev; + bool frozen = false; dev = get_cpu_device(cpu); if (dev) { - switch (action) { + + if (action & CPU_TASKS_FROZEN) + frozen = true; + + switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - cpufreq_add_dev(dev, NULL); + __cpufreq_add_dev(dev, NULL, frozen); + cpufreq_update_policy(cpu); break; + case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - __cpufreq_remove_dev(dev, NULL); + __cpufreq_remove_dev(dev, NULL, frozen); break; + case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: - cpufreq_add_dev(dev, NULL); + __cpufreq_add_dev(dev, NULL, frozen); break; } } @@ -2059,9 +2105,13 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver) subsys_interface_unregister(&cpufreq_interface); unregister_hotcpu_notifier(&cpufreq_cpu_notifier); + down_write(&cpufreq_rwsem); write_lock_irqsave(&cpufreq_driver_lock, flags); + cpufreq_driver = NULL; + write_unlock_irqrestore(&cpufreq_driver_lock, flags); + up_write(&cpufreq_rwsem); return 0; } @@ -2074,10 +2124,8 @@ static int __init cpufreq_core_init(void) if (cpufreq_disabled()) return -ENODEV; - for_each_possible_cpu(cpu) { - per_cpu(cpufreq_policy_cpu, cpu) = -1; + for_each_possible_cpu(cpu) init_rwsem(&per_cpu(cpu_policy_rwsem, cpu)); - } cpufreq_global_kobject = kobject_create(); BUG_ON(!cpufreq_global_kobject); |