diff options
-rw-r--r-- | drivers/cpufreq/acpi-cpufreq.c | 4 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq.c | 6 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_ondemand.c | 4 | ||||
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 43 | ||||
-rw-r--r-- | drivers/cpufreq/powernow-k8.c | 6 | ||||
-rw-r--r-- | drivers/cpufreq/powernv-cpufreq.c | 4 | ||||
-rw-r--r-- | drivers/thermal/intel/therm_throt.c | 7 | ||||
-rw-r--r-- | drivers/thermal/intel/thermal_interrupt.h | 3 | ||||
-rw-r--r-- | include/linux/energy_model.h | 16 | ||||
-rw-r--r-- | include/linux/notifier.h | 2 | ||||
-rw-r--r-- | kernel/cpu_pm.c | 50 | ||||
-rw-r--r-- | kernel/notifier.c | 19 | ||||
-rw-r--r-- | kernel/power/energy_model.c | 4 | ||||
-rw-r--r-- | kernel/sched/cpufreq_schedutil.c | 16 |
14 files changed, 130 insertions, 54 deletions
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 7e7450453714..b49612895c78 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -163,9 +163,9 @@ static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf, if (ret || val > 1) return -EINVAL; - get_online_cpus(); + cpus_read_lock(); set_boost(policy, val); - put_online_cpus(); + cpus_read_unlock(); return count; } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 45f3416988f1..06c526d66dd3 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2654,18 +2654,18 @@ int cpufreq_boost_trigger_state(int state) cpufreq_driver->boost_enabled = state; write_unlock_irqrestore(&cpufreq_driver_lock, flags); - get_online_cpus(); + cpus_read_lock(); for_each_active_policy(policy) { ret = cpufreq_driver->set_boost(policy, state); if (ret) goto err_reset_state; } - put_online_cpus(); + cpus_read_unlock(); return 0; err_reset_state: - put_online_cpus(); + cpus_read_unlock(); write_lock_irqsave(&cpufreq_driver_lock, flags); cpufreq_driver->boost_enabled = !state; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index ac361a8b1d3b..eb4320b619c9 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -418,7 +418,7 @@ static void od_set_powersave_bias(unsigned int powersave_bias) default_powersave_bias = powersave_bias; cpumask_clear(&done); - get_online_cpus(); + cpus_read_lock(); for_each_online_cpu(cpu) { struct cpufreq_policy *policy; struct policy_dbs_info *policy_dbs; @@ -442,7 +442,7 @@ static void od_set_powersave_bias(unsigned int powersave_bias) od_tuners = dbs_data->tuners; od_tuners->powersave_bias = default_powersave_bias; } - put_online_cpus(); + cpus_read_unlock(); } void od_register_powersave_bias_handler(unsigned int (*f) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index bb4549959b11..b4ffe6c8a0d0 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -32,6 +32,7 @@ #include <asm/cpu_device_id.h> #include <asm/cpufeature.h> #include <asm/intel-family.h> +#include "../drivers/thermal/intel/thermal_interrupt.h" #define INTEL_PSTATE_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC) @@ -219,6 +220,7 @@ struct global_params { * @sched_flags: Store scheduler flags for possible cross CPU update * @hwp_boost_min: Last HWP boosted min performance * @suspended: Whether or not the driver has been suspended. + * @hwp_notify_work: workqueue for HWP notifications. * * This structure stores per CPU instance data for all CPUs. */ @@ -257,6 +259,7 @@ struct cpudata { unsigned int sched_flags; u32 hwp_boost_min; bool suspended; + struct delayed_work hwp_notify_work; }; static struct cpudata **all_cpu_data; @@ -1625,6 +1628,40 @@ static void intel_pstate_sysfs_hide_hwp_dynamic_boost(void) /************************** sysfs end ************************/ +static void intel_pstate_notify_work(struct work_struct *work) +{ + mutex_lock(&intel_pstate_driver_lock); + cpufreq_update_policy(smp_processor_id()); + wrmsrl(MSR_HWP_STATUS, 0); + mutex_unlock(&intel_pstate_driver_lock); +} + +void notify_hwp_interrupt(void) +{ + unsigned int this_cpu = smp_processor_id(); + struct cpudata *cpudata; + u64 value; + + if (!hwp_active || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) + return; + + rdmsrl(MSR_HWP_STATUS, value); + if (!(value & 0x01)) + return; + + cpudata = all_cpu_data[this_cpu]; + schedule_delayed_work_on(this_cpu, &cpudata->hwp_notify_work, msecs_to_jiffies(10)); +} + +static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata) +{ + /* Enable HWP notification interrupt for guaranteed performance change */ + if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) { + INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work); + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x01); + } +} + static void intel_pstate_hwp_enable(struct cpudata *cpudata) { /* First disable HWP notification interrupt as we don't process them */ @@ -1634,6 +1671,8 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata) wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); if (cpudata->epp_default == -EINVAL) cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); + + intel_pstate_enable_hwp_interrupt(cpudata); } static int atom_get_min_pstate(void) @@ -2969,7 +3008,7 @@ static void intel_pstate_driver_cleanup(void) { unsigned int cpu; - get_online_cpus(); + cpus_read_lock(); for_each_online_cpu(cpu) { if (all_cpu_data[cpu]) { if (intel_pstate_driver == &intel_pstate) @@ -2979,7 +3018,7 @@ static void intel_pstate_driver_cleanup(void) all_cpu_data[cpu] = NULL; } } - put_online_cpus(); + cpus_read_unlock(); intel_pstate_driver = NULL; } diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index b9ccb6a3dad9..12ab4014af71 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -1180,7 +1180,7 @@ static int powernowk8_init(void) if (!x86_match_cpu(powernow_k8_ids)) return -ENODEV; - get_online_cpus(); + cpus_read_lock(); for_each_online_cpu(i) { smp_call_function_single(i, check_supported_cpu, &ret, 1); if (!ret) @@ -1188,10 +1188,10 @@ static int powernowk8_init(void) } if (supported_cpus != num_online_cpus()) { - put_online_cpus(); + cpus_read_unlock(); return -ENODEV; } - put_online_cpus(); + cpus_read_unlock(); ret = cpufreq_register_driver(&cpufreq_amd64_driver); if (ret) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 005600cef273..23a06cba392c 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -918,7 +918,7 @@ static void powernv_cpufreq_work_fn(struct work_struct *work) unsigned int cpu; cpumask_t mask; - get_online_cpus(); + cpus_read_lock(); cpumask_and(&mask, &chip->mask, cpu_online_mask); smp_call_function_any(&mask, powernv_cpufreq_throttle_check, NULL, 0); @@ -939,7 +939,7 @@ static void powernv_cpufreq_work_fn(struct work_struct *work) cpufreq_cpu_put(policy); } out: - put_online_cpus(); + cpus_read_unlock(); } static int powernv_cpufreq_occ_msg(struct notifier_block *nb, diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c index 99abdc03c44c..dab7e8fb1059 100644 --- a/drivers/thermal/intel/therm_throt.c +++ b/drivers/thermal/intel/therm_throt.c @@ -569,13 +569,18 @@ static void notify_thresholds(__u64 msr_val) platform_thermal_notify(msr_val); } +void __weak notify_hwp_interrupt(void) +{ + wrmsrl_safe(MSR_HWP_STATUS, 0); +} + /* Thermal transition interrupt handler */ void intel_thermal_interrupt(void) { __u64 msr_val; if (static_cpu_has(X86_FEATURE_HWP)) - wrmsrl_safe(MSR_HWP_STATUS, 0); + notify_hwp_interrupt(); rdmsrl(MSR_IA32_THERM_STATUS, msr_val); diff --git a/drivers/thermal/intel/thermal_interrupt.h b/drivers/thermal/intel/thermal_interrupt.h index 53f427bb58dc..01e7bed2ffc7 100644 --- a/drivers/thermal/intel/thermal_interrupt.h +++ b/drivers/thermal/intel/thermal_interrupt.h @@ -12,4 +12,7 @@ extern int (*platform_thermal_notify)(__u64 msr_val); * callback has rate control */ extern bool (*platform_thermal_package_rate_control)(void); +/* Handle HWP interrupt */ +extern void notify_hwp_interrupt(void); + #endif /* _INTEL_THERMAL_INTERRUPT_H */ diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 3f221dbf5f95..1834752c5617 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -53,6 +53,22 @@ struct em_perf_domain { #ifdef CONFIG_ENERGY_MODEL #define EM_MAX_POWER 0xFFFF +/* + * Increase resolution of energy estimation calculations for 64-bit + * architectures. The extra resolution improves decision made by EAS for the + * task placement when two Performance Domains might provide similar energy + * estimation values (w/o better resolution the values could be equal). + * + * We increase resolution only if we have enough bits to allow this increased + * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit + * are pretty high and the returns do not justify the increased costs. + */ +#ifdef CONFIG_64BIT +#define em_scale_power(p) ((p) * 1000) +#else +#define em_scale_power(p) (p) +#endif + struct em_data_callback { /** * active_power() - Provide power at the next performance state of diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 2fb373a5c1ed..87069b8459af 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -168,8 +168,6 @@ extern int raw_notifier_call_chain(struct raw_notifier_head *nh, extern int srcu_notifier_call_chain(struct srcu_notifier_head *nh, unsigned long val, void *v); -extern int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh, - unsigned long val_up, unsigned long val_down, void *v); extern int blocking_notifier_call_chain_robust(struct blocking_notifier_head *nh, unsigned long val_up, unsigned long val_down, void *v); extern int raw_notifier_call_chain_robust(struct raw_notifier_head *nh, diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c index f7e1d0eccdbc..246efc74e3f3 100644 --- a/kernel/cpu_pm.c +++ b/kernel/cpu_pm.c @@ -13,19 +13,32 @@ #include <linux/spinlock.h> #include <linux/syscore_ops.h> -static ATOMIC_NOTIFIER_HEAD(cpu_pm_notifier_chain); +/* + * atomic_notifiers use a spinlock_t, which can block under PREEMPT_RT. + * Notifications for cpu_pm will be issued by the idle task itself, which can + * never block, IOW it requires using a raw_spinlock_t. + */ +static struct { + struct raw_notifier_head chain; + raw_spinlock_t lock; +} cpu_pm_notifier = { + .chain = RAW_NOTIFIER_INIT(cpu_pm_notifier.chain), + .lock = __RAW_SPIN_LOCK_UNLOCKED(cpu_pm_notifier.lock), +}; static int cpu_pm_notify(enum cpu_pm_event event) { int ret; /* - * atomic_notifier_call_chain has a RCU read critical section, which - * could be disfunctional in cpu idle. Copy RCU_NONIDLE code to let - * RCU know this. + * This introduces a RCU read critical section, which could be + * disfunctional in cpu idle. Copy RCU_NONIDLE code to let RCU know + * this. */ rcu_irq_enter_irqson(); - ret = atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL); + rcu_read_lock(); + ret = raw_notifier_call_chain(&cpu_pm_notifier.chain, event, NULL); + rcu_read_unlock(); rcu_irq_exit_irqson(); return notifier_to_errno(ret); @@ -33,10 +46,13 @@ static int cpu_pm_notify(enum cpu_pm_event event) static int cpu_pm_notify_robust(enum cpu_pm_event event_up, enum cpu_pm_event event_down) { + unsigned long flags; int ret; rcu_irq_enter_irqson(); - ret = atomic_notifier_call_chain_robust(&cpu_pm_notifier_chain, event_up, event_down, NULL); + raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags); + ret = raw_notifier_call_chain_robust(&cpu_pm_notifier.chain, event_up, event_down, NULL); + raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags); rcu_irq_exit_irqson(); return notifier_to_errno(ret); @@ -49,12 +65,17 @@ static int cpu_pm_notify_robust(enum cpu_pm_event event_up, enum cpu_pm_event ev * Add a driver to a list of drivers that are notified about * CPU and CPU cluster low power entry and exit. * - * This function may sleep, and has the same return conditions as - * raw_notifier_chain_register. + * This function has the same return conditions as raw_notifier_chain_register. */ int cpu_pm_register_notifier(struct notifier_block *nb) { - return atomic_notifier_chain_register(&cpu_pm_notifier_chain, nb); + unsigned long flags; + int ret; + + raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags); + ret = raw_notifier_chain_register(&cpu_pm_notifier.chain, nb); + raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags); + return ret; } EXPORT_SYMBOL_GPL(cpu_pm_register_notifier); @@ -64,12 +85,17 @@ EXPORT_SYMBOL_GPL(cpu_pm_register_notifier); * * Remove a driver from the CPU PM notifier list. * - * This function may sleep, and has the same return conditions as - * raw_notifier_chain_unregister. + * This function has the same return conditions as raw_notifier_chain_unregister. */ int cpu_pm_unregister_notifier(struct notifier_block *nb) { - return atomic_notifier_chain_unregister(&cpu_pm_notifier_chain, nb); + unsigned long flags; + int ret; + + raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags); + ret = raw_notifier_chain_unregister(&cpu_pm_notifier.chain, nb); + raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags); + return ret; } EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier); diff --git a/kernel/notifier.c b/kernel/notifier.c index 1b019cbca594..b8251dc0bc0f 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -172,25 +172,6 @@ int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, } EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister); -int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh, - unsigned long val_up, unsigned long val_down, void *v) -{ - unsigned long flags; - int ret; - - /* - * Musn't use RCU; because then the notifier list can - * change between the up and down traversal. - */ - spin_lock_irqsave(&nh->lock, flags); - ret = notifier_call_chain_robust(&nh->head, val_up, val_down, v); - spin_unlock_irqrestore(&nh->lock, flags); - - return ret; -} -EXPORT_SYMBOL_GPL(atomic_notifier_call_chain_robust); -NOKPROBE_SYMBOL(atomic_notifier_call_chain_robust); - /** * atomic_notifier_call_chain - Call functions in an atomic notifier chain * @nh: Pointer to head of the atomic notifier chain diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 0f4530b3a8cd..a332ccd829e2 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -170,7 +170,9 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, /* Compute the cost of each performance state. */ fmax = (u64) table[nr_states - 1].frequency; for (i = 0; i < nr_states; i++) { - table[i].cost = div64_u64(fmax * table[i].power, + unsigned long power_res = em_scale_power(table[i].power); + + table[i].cost = div64_u64(fmax * power_res, table[i].frequency); } diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 57124614363d..e7af18857371 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -537,9 +537,17 @@ static struct attribute *sugov_attrs[] = { }; ATTRIBUTE_GROUPS(sugov); +static void sugov_tunables_free(struct kobject *kobj) +{ + struct gov_attr_set *attr_set = container_of(kobj, struct gov_attr_set, kobj); + + kfree(to_sugov_tunables(attr_set)); +} + static struct kobj_type sugov_tunables_ktype = { .default_groups = sugov_groups, .sysfs_ops = &governor_sysfs_ops, + .release = &sugov_tunables_free, }; /********************** cpufreq governor interface *********************/ @@ -639,12 +647,10 @@ static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_polic return tunables; } -static void sugov_tunables_free(struct sugov_tunables *tunables) +static void sugov_clear_global_tunables(void) { if (!have_governor_per_policy()) global_tunables = NULL; - - kfree(tunables); } static int sugov_init(struct cpufreq_policy *policy) @@ -707,7 +713,7 @@ out: fail: kobject_put(&tunables->attr_set.kobj); policy->governor_data = NULL; - sugov_tunables_free(tunables); + sugov_clear_global_tunables(); stop_kthread: sugov_kthread_stop(sg_policy); @@ -734,7 +740,7 @@ static void sugov_exit(struct cpufreq_policy *policy) count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); policy->governor_data = NULL; if (!count) - sugov_tunables_free(tunables); + sugov_clear_global_tunables(); mutex_unlock(&global_tunables_lock); |