summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/cpufreq/acpi-cpufreq.c3
-rw-r--r--drivers/cpufreq/amd_freq_sensitivity.c3
-rw-r--r--drivers/cpufreq/cpufreq.c19
-rw-r--r--drivers/cpufreq/cpufreq_conservative.c6
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.c16
-rw-r--r--drivers/cpufreq/mediatek-cpufreq-hw.c2
-rw-r--r--drivers/cpufreq/powernv-cpufreq.c4
-rw-r--r--drivers/cpufreq/s5pv210-cpufreq.c2
-rw-r--r--drivers/powercap/dtpm.c78
-rw-r--r--drivers/powercap/dtpm_cpu.c228
-rw-r--r--include/linux/cpufreq.h165
-rw-r--r--include/linux/cpuhotplug.h2
-rw-r--r--include/linux/dtpm.h26
-rw-r--r--include/linux/energy_model.h68
-rw-r--r--kernel/power/energy_model.c86
15 files changed, 473 insertions, 235 deletions
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 28467d83c745..3d514b82d055 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -470,7 +470,8 @@ static unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy,
if (policy->cached_target_freq == target_freq)
index = policy->cached_resolved_idx;
else
- index = cpufreq_table_find_index_dl(policy, target_freq);
+ index = cpufreq_table_find_index_dl(policy, target_freq,
+ false);
entry = &policy->freq_table[index];
next_freq = entry->frequency;
diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c
index d0b10baf039a..6448e03bcf48 100644
--- a/drivers/cpufreq/amd_freq_sensitivity.c
+++ b/drivers/cpufreq/amd_freq_sensitivity.c
@@ -91,7 +91,8 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy,
unsigned int index;
index = cpufreq_table_find_index_h(policy,
- policy->cur - 1);
+ policy->cur - 1,
+ relation & CPUFREQ_RELATION_E);
freq_next = policy->freq_table[index].frequency;
}
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 5782b15a8caa..e338d2f010fe 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -554,7 +554,7 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy,
unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy,
unsigned int target_freq)
{
- return __resolve_freq(policy, target_freq, CPUFREQ_RELATION_L);
+ return __resolve_freq(policy, target_freq, CPUFREQ_RELATION_LE);
}
EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq);
@@ -2260,8 +2260,16 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
!(cpufreq_driver->flags & CPUFREQ_NEED_UPDATE_LIMITS))
return 0;
- if (cpufreq_driver->target)
+ if (cpufreq_driver->target) {
+ /*
+ * If the driver hasn't setup a single inefficient frequency,
+ * it's unlikely it knows how to decode CPUFREQ_RELATION_E.
+ */
+ if (!policy->efficiencies_available)
+ relation &= ~CPUFREQ_RELATION_E;
+
return cpufreq_driver->target(policy, target_freq, relation);
+ }
if (!cpufreq_driver->target_index)
return -EINVAL;
@@ -2523,8 +2531,15 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
if (ret)
return ret;
+ /*
+ * Resolve policy min/max to available frequencies. It ensures
+ * no frequency resolution will neither overshoot the requested maximum
+ * nor undershoot the requested minimum.
+ */
policy->min = new_data.min;
policy->max = new_data.max;
+ policy->min = __resolve_freq(policy, policy->min, CPUFREQ_RELATION_L);
+ policy->max = __resolve_freq(policy, policy->max, CPUFREQ_RELATION_H);
trace_cpu_frequency_limits(policy);
policy->cached_target_freq = UINT_MAX;
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index aa39ff31ec9f..0879ec3c170c 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -111,7 +111,8 @@ static unsigned int cs_dbs_update(struct cpufreq_policy *policy)
if (requested_freq > policy->max)
requested_freq = policy->max;
- __cpufreq_driver_target(policy, requested_freq, CPUFREQ_RELATION_H);
+ __cpufreq_driver_target(policy, requested_freq,
+ CPUFREQ_RELATION_HE);
dbs_info->requested_freq = requested_freq;
goto out;
}
@@ -134,7 +135,8 @@ static unsigned int cs_dbs_update(struct cpufreq_policy *policy)
else
requested_freq = policy->min;
- __cpufreq_driver_target(policy, requested_freq, CPUFREQ_RELATION_L);
+ __cpufreq_driver_target(policy, requested_freq,
+ CPUFREQ_RELATION_LE);
dbs_info->requested_freq = requested_freq;
}
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index eb4320b619c9..3b8f924771b4 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -83,9 +83,11 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy,
freq_avg = freq_req - freq_reduc;
/* Find freq bounds for freq_avg in freq_table */
- index = cpufreq_table_find_index_h(policy, freq_avg);
+ index = cpufreq_table_find_index_h(policy, freq_avg,
+ relation & CPUFREQ_RELATION_E);
freq_lo = freq_table[index].frequency;
- index = cpufreq_table_find_index_l(policy, freq_avg);
+ index = cpufreq_table_find_index_l(policy, freq_avg,
+ relation & CPUFREQ_RELATION_E);
freq_hi = freq_table[index].frequency;
/* Find out how long we have to be in hi and lo freqs */
@@ -118,12 +120,12 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq)
if (od_tuners->powersave_bias)
freq = od_ops.powersave_bias_target(policy, freq,
- CPUFREQ_RELATION_H);
+ CPUFREQ_RELATION_HE);
else if (policy->cur == policy->max)
return;
__cpufreq_driver_target(policy, freq, od_tuners->powersave_bias ?
- CPUFREQ_RELATION_L : CPUFREQ_RELATION_H);
+ CPUFREQ_RELATION_LE : CPUFREQ_RELATION_HE);
}
/*
@@ -161,9 +163,9 @@ static void od_update(struct cpufreq_policy *policy)
if (od_tuners->powersave_bias)
freq_next = od_ops.powersave_bias_target(policy,
freq_next,
- CPUFREQ_RELATION_L);
+ CPUFREQ_RELATION_LE);
- __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_C);
+ __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_CE);
}
}
@@ -182,7 +184,7 @@ static unsigned int od_dbs_update(struct cpufreq_policy *policy)
*/
if (sample_type == OD_SUB_SAMPLE && policy_dbs->sample_delay_ns > 0) {
__cpufreq_driver_target(policy, dbs_info->freq_lo,
- CPUFREQ_RELATION_H);
+ CPUFREQ_RELATION_HE);
return dbs_info->freq_lo_delay_us;
}
diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c
index 0cf18dd46b92..8ddbd0c5ce37 100644
--- a/drivers/cpufreq/mediatek-cpufreq-hw.c
+++ b/drivers/cpufreq/mediatek-cpufreq-hw.c
@@ -109,7 +109,7 @@ static unsigned int mtk_cpufreq_hw_fast_switch(struct cpufreq_policy *policy,
struct mtk_cpufreq_data *data = policy->driver_data;
unsigned int index;
- index = cpufreq_table_find_index_dl(policy, target_freq);
+ index = cpufreq_table_find_index_dl(policy, target_freq, false);
writel_relaxed(index, data->reg_bases[REG_FREQ_PERF_STATE]);
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 5a2cf5f91ccb..fddbd1ea1635 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -934,7 +934,7 @@ static void powernv_cpufreq_work_fn(struct work_struct *work)
policy = cpufreq_cpu_get(cpu);
if (!policy)
continue;
- index = cpufreq_table_find_index_c(policy, policy->cur);
+ index = cpufreq_table_find_index_c(policy, policy->cur, false);
powernv_cpufreq_target_index(policy, index);
cpumask_andnot(&mask, &mask, policy->cpus);
cpufreq_cpu_put(policy);
@@ -1022,7 +1022,7 @@ static unsigned int powernv_fast_switch(struct cpufreq_policy *policy,
int index;
struct powernv_smp_call_data freq_data;
- index = cpufreq_table_find_index_dl(policy, target_freq);
+ index = cpufreq_table_find_index_dl(policy, target_freq, false);
freq_data.pstate_id = powernv_freqs[index].driver_data;
freq_data.gpstate_id = powernv_freqs[index].driver_data;
set_pstate(&freq_data);
diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c
index ad7d4f272ddc..76c888ed8d16 100644
--- a/drivers/cpufreq/s5pv210-cpufreq.c
+++ b/drivers/cpufreq/s5pv210-cpufreq.c
@@ -243,7 +243,7 @@ static int s5pv210_target(struct cpufreq_policy *policy, unsigned int index)
new_freq = s5pv210_freq_table[index].frequency;
/* Finding current running level index */
- priv_index = cpufreq_table_find_index_h(policy, old_freq);
+ priv_index = cpufreq_table_find_index_h(policy, old_freq, false);
arm_volt = dvs_conf[index].arm_volt;
int_volt = dvs_conf[index].int_volt;
diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
index c2185ec5f887..b9fac786246a 100644
--- a/drivers/powercap/dtpm.c
+++ b/drivers/powercap/dtpm.c
@@ -116,8 +116,6 @@ static void __dtpm_sub_power(struct dtpm *dtpm)
parent->power_limit -= dtpm->power_limit;
parent = parent->parent;
}
-
- __dtpm_rebalance_weight(root);
}
static void __dtpm_add_power(struct dtpm *dtpm)
@@ -130,45 +128,45 @@ static void __dtpm_add_power(struct dtpm *dtpm)
parent->power_limit += dtpm->power_limit;
parent = parent->parent;
}
+}
+
+static int __dtpm_update_power(struct dtpm *dtpm)
+{
+ int ret;
+
+ __dtpm_sub_power(dtpm);
+
+ ret = dtpm->ops->update_power_uw(dtpm);
+ if (ret)
+ pr_err("Failed to update power for '%s': %d\n",
+ dtpm->zone.name, ret);
- __dtpm_rebalance_weight(root);
+ if (!test_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags))
+ dtpm->power_limit = dtpm->power_max;
+
+ __dtpm_add_power(dtpm);
+
+ if (root)
+ __dtpm_rebalance_weight(root);
+
+ return ret;
}
/**
* dtpm_update_power - Update the power on the dtpm
* @dtpm: a pointer to a dtpm structure to update
- * @power_min: a u64 representing the new power_min value
- * @power_max: a u64 representing the new power_max value
*
* Function to update the power values of the dtpm node specified in
* parameter. These new values will be propagated to the tree.
*
* Return: zero on success, -EINVAL if the values are inconsistent
*/
-int dtpm_update_power(struct dtpm *dtpm, u64 power_min, u64 power_max)
+int dtpm_update_power(struct dtpm *dtpm)
{
- int ret = 0;
+ int ret;
mutex_lock(&dtpm_lock);
-
- if (power_min == dtpm->power_min && power_max == dtpm->power_max)
- goto unlock;
-
- if (power_max < power_min) {
- ret = -EINVAL;
- goto unlock;
- }
-
- __dtpm_sub_power(dtpm);
-
- dtpm->power_min = power_min;
- dtpm->power_max = power_max;
- if (!test_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags))
- dtpm->power_limit = power_max;
-
- __dtpm_add_power(dtpm);
-
-unlock:
+ ret = __dtpm_update_power(dtpm);
mutex_unlock(&dtpm_lock);
return ret;
@@ -359,24 +357,18 @@ static struct powercap_zone_ops zone_ops = {
};
/**
- * dtpm_alloc - Allocate and initialize a dtpm struct
- * @name: a string specifying the name of the node
- *
- * Return: a struct dtpm pointer, NULL in case of error
+ * dtpm_init - Allocate and initialize a dtpm struct
+ * @dtpm: The dtpm struct pointer to be initialized
+ * @ops: The dtpm device specific ops, NULL for a virtual node
*/
-struct dtpm *dtpm_alloc(struct dtpm_ops *ops)
+void dtpm_init(struct dtpm *dtpm, struct dtpm_ops *ops)
{
- struct dtpm *dtpm;
-
- dtpm = kzalloc(sizeof(*dtpm), GFP_KERNEL);
if (dtpm) {
INIT_LIST_HEAD(&dtpm->children);
INIT_LIST_HEAD(&dtpm->sibling);
dtpm->weight = 1024;
dtpm->ops = ops;
}
-
- return dtpm;
}
/**
@@ -436,6 +428,7 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
if (dtpm->ops && !(dtpm->ops->set_power_uw &&
dtpm->ops->get_power_uw &&
+ dtpm->ops->update_power_uw &&
dtpm->ops->release))
return -EINVAL;
@@ -455,7 +448,10 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
root = dtpm;
}
- __dtpm_add_power(dtpm);
+ if (dtpm->ops && !dtpm->ops->update_power_uw(dtpm)) {
+ __dtpm_add_power(dtpm);
+ dtpm->power_limit = dtpm->power_max;
+ }
pr_info("Registered dtpm node '%s' / %llu-%llu uW, \n",
dtpm->zone.name, dtpm->power_min, dtpm->power_max);
@@ -465,9 +461,9 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
return 0;
}
-static int __init dtpm_init(void)
+static int __init init_dtpm(void)
{
- struct dtpm_descr **dtpm_descr;
+ struct dtpm_descr *dtpm_descr;
pct = powercap_register_control_type(NULL, "dtpm", NULL);
if (IS_ERR(pct)) {
@@ -476,8 +472,8 @@ static int __init dtpm_init(void)
}
for_each_dtpm_table(dtpm_descr)
- (*dtpm_descr)->init(*dtpm_descr);
+ dtpm_descr->init();
return 0;
}
-late_initcall(dtpm_init);
+late_initcall(init_dtpm);
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index 51c366938acd..44faa3a74db6 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -14,6 +14,8 @@
* The CPU hotplug is supported and the power numbers will be updated
* if a CPU is hot plugged / unplugged.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/cpumask.h>
#include <linux/cpufreq.h>
#include <linux/cpuhotplug.h>
@@ -23,66 +25,29 @@
#include <linux/slab.h>
#include <linux/units.h>
-static struct dtpm *__parent;
-
-static DEFINE_PER_CPU(struct dtpm *, dtpm_per_cpu);
-
struct dtpm_cpu {
+ struct dtpm dtpm;
struct freq_qos_request qos_req;
int cpu;
};
-/*
- * When a new CPU is inserted at hotplug or boot time, add the power
- * contribution and update the dtpm tree.
- */
-static int power_add(struct dtpm *dtpm, struct em_perf_domain *em)
-{
- u64 power_min, power_max;
-
- power_min = em->table[0].power;
- power_min *= MICROWATT_PER_MILLIWATT;
- power_min += dtpm->power_min;
+static DEFINE_PER_CPU(struct dtpm_cpu *, dtpm_per_cpu);
- power_max = em->table[em->nr_perf_states - 1].power;
- power_max *= MICROWATT_PER_MILLIWATT;
- power_max += dtpm->power_max;
-
- return dtpm_update_power(dtpm, power_min, power_max);
-}
-
-/*
- * When a CPU is unplugged, remove its power contribution from the
- * dtpm tree.
- */
-static int power_sub(struct dtpm *dtpm, struct em_perf_domain *em)
+static struct dtpm_cpu *to_dtpm_cpu(struct dtpm *dtpm)
{
- u64 power_min, power_max;
-
- power_min = em->table[0].power;
- power_min *= MICROWATT_PER_MILLIWATT;
- power_min = dtpm->power_min - power_min;
-
- power_max = em->table[em->nr_perf_states - 1].power;
- power_max *= MICROWATT_PER_MILLIWATT;
- power_max = dtpm->power_max - power_max;
-
- return dtpm_update_power(dtpm, power_min, power_max);
+ return container_of(dtpm, struct dtpm_cpu, dtpm);
}
static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
{
- struct dtpm_cpu *dtpm_cpu = dtpm->private;
- struct em_perf_domain *pd;
+ struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
+ struct em_perf_domain *pd = em_cpu_get(dtpm_cpu->cpu);
struct cpumask cpus;
unsigned long freq;
u64 power;
int i, nr_cpus;
- pd = em_cpu_get(dtpm_cpu->cpu);
-
cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
-
nr_cpus = cpumask_weight(&cpus);
for (i = 0; i < pd->nr_perf_states; i++) {
@@ -103,34 +68,88 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
return power_limit;
}
+static u64 scale_pd_power_uw(struct cpumask *pd_mask, u64 power)
+{
+ unsigned long max = 0, sum_util = 0;
+ int cpu;
+
+ for_each_cpu_and(cpu, pd_mask, cpu_online_mask) {
+
+ /*
+ * The capacity is the same for all CPUs belonging to
+ * the same perf domain, so a single call to
+ * arch_scale_cpu_capacity() is enough. However, we
+ * need the CPU parameter to be initialized by the
+ * loop, so the call ends up in this block.
+ *
+ * We can initialize 'max' with a cpumask_first() call
+ * before the loop but the bits computation is not
+ * worth given the arch_scale_cpu_capacity() just
+ * returns a value where the resulting assembly code
+ * will be optimized by the compiler.
+ */
+ max = arch_scale_cpu_capacity(cpu);
+ sum_util += sched_cpu_util(cpu, max);
+ }
+
+ /*
+ * In the improbable case where all the CPUs of the perf
+ * domain are offline, 'max' will be zero and will lead to an
+ * illegal operation with a zero division.
+ */
+ return max ? (power * ((sum_util << 10) / max)) >> 10 : 0;
+}
+
static u64 get_pd_power_uw(struct dtpm *dtpm)
{
- struct dtpm_cpu *dtpm_cpu = dtpm->private;
+ struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
struct em_perf_domain *pd;
- struct cpumask cpus;
+ struct cpumask *pd_mask;
unsigned long freq;
- int i, nr_cpus;
+ int i;
pd = em_cpu_get(dtpm_cpu->cpu);
+
+ pd_mask = em_span_cpus(pd);
+
freq = cpufreq_quick_get(dtpm_cpu->cpu);
- cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
- nr_cpus = cpumask_weight(&cpus);
for (i = 0; i < pd->nr_perf_states; i++) {
if (pd->table[i].frequency < freq)
continue;
- return pd->table[i].power *
- MICROWATT_PER_MILLIWATT * nr_cpus;
+ return scale_pd_power_uw(pd_mask, pd->table[i].power *
+ MICROWATT_PER_MILLIWATT);
}
return 0;
}
+static int update_pd_power_uw(struct dtpm *dtpm)
+{
+ struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
+ struct em_perf_domain *em = em_cpu_get(dtpm_cpu->cpu);
+ struct cpumask cpus;
+ int nr_cpus;
+
+ cpumask_and(&cpus, cpu_online_mask, to_cpumask(em->cpus));
+ nr_cpus = cpumask_weight(&cpus);
+
+ dtpm->power_min = em->table[0].power;
+ dtpm->power_min *= MICROWATT_PER_MILLIWATT;
+ dtpm->power_min *= nr_cpus;
+
+ dtpm->power_max = em->table[em->nr_perf_states - 1].power;
+ dtpm->power_max *= MICROWATT_PER_MILLIWATT;
+ dtpm->power_max *= nr_cpus;
+
+ return 0;
+}
+
static void pd_release(struct dtpm *dtpm)
{
- struct dtpm_cpu *dtpm_cpu = dtpm->private;
+ struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
if (freq_qos_request_active(&dtpm_cpu->qos_req))
freq_qos_remove_request(&dtpm_cpu->qos_req);
@@ -139,44 +158,28 @@ static void pd_release(struct dtpm *dtpm)
}
static struct dtpm_ops dtpm_ops = {
- .set_power_uw = set_pd_power_limit,
- .get_power_uw = get_pd_power_uw,
- .release = pd_release,
+ .set_power_uw = set_pd_power_limit,
+ .get_power_uw = get_pd_power_uw,
+ .update_power_uw = update_pd_power_uw,
+ .release = pd_release,
};
static int cpuhp_dtpm_cpu_offline(unsigned int cpu)
{
- struct cpufreq_policy *policy;
struct em_perf_domain *pd;
- struct dtpm *dtpm;
-
- policy = cpufreq_cpu_get(cpu);
-
- if (!policy)
- return 0;
+ struct dtpm_cpu *dtpm_cpu;
pd = em_cpu_get(cpu);
if (!pd)
return -EINVAL;
- dtpm = per_cpu(dtpm_per_cpu, cpu);
+ dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
- power_sub(dtpm, pd);
-
- if (cpumask_weight(policy->cpus) != 1)
- return 0;
-
- for_each_cpu(cpu, policy->related_cpus)
- per_cpu(dtpm_per_cpu, cpu) = NULL;
-
- dtpm_unregister(dtpm);
-
- return 0;
+ return dtpm_update_power(&dtpm_cpu->dtpm);
}
static int cpuhp_dtpm_cpu_online(unsigned int cpu)
{
- struct dtpm *dtpm;
struct dtpm_cpu *dtpm_cpu;
struct cpufreq_policy *policy;
struct em_perf_domain *pd;
@@ -184,7 +187,6 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
int ret = -ENOMEM;
policy = cpufreq_cpu_get(cpu);
-
if (!policy)
return 0;
@@ -192,66 +194,82 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
if (!pd)
return -EINVAL;
- dtpm = per_cpu(dtpm_per_cpu, cpu);
- if (dtpm)
- return power_add(dtpm, pd);
-
- dtpm = dtpm_alloc(&dtpm_ops);
- if (!dtpm)
- return -EINVAL;
+ dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
+ if (dtpm_cpu)
+ return dtpm_update_power(&dtpm_cpu->dtpm);
dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL);
if (!dtpm_cpu)
- goto out_kfree_dtpm;
+ return -ENOMEM;
- dtpm->private = dtpm_cpu;
+ dtpm_init(&dtpm_cpu->dtpm, &dtpm_ops);
dtpm_cpu->cpu = cpu;
for_each_cpu(cpu, policy->related_cpus)
- per_cpu(dtpm_per_cpu, cpu) = dtpm;
+ per_cpu(dtpm_per_cpu, cpu) = dtpm_cpu;
- sprintf(name, "cpu%d", dtpm_cpu->cpu);
+ snprintf(name, sizeof(name), "cpu%d-cpufreq", dtpm_cpu->cpu);
- ret = dtpm_register(name, dtpm, __parent);
+ ret = dtpm_register(name, &dtpm_cpu->dtpm, NULL);
if (ret)
goto out_kfree_dtpm_cpu;
- ret = power_add(dtpm, pd);
- if (ret)
- goto out_dtpm_unregister;
-
ret = freq_qos_add_request(&policy->constraints,
&dtpm_cpu->qos_req, FREQ_QOS_MAX,
pd->table[pd->nr_perf_states - 1].frequency);
if (ret)
- goto out_power_sub;
+ goto out_dtpm_unregister;
return 0;
-out_power_sub:
- power_sub(dtpm, pd);
-
out_dtpm_unregister:
- dtpm_unregister(dtpm);
+ dtpm_unregister(&dtpm_cpu->dtpm);
dtpm_cpu = NULL;
- dtpm = NULL;
out_kfree_dtpm_cpu:
for_each_cpu(cpu, policy->related_cpus)
per_cpu(dtpm_per_cpu, cpu) = NULL;
kfree(dtpm_cpu);
-out_kfree_dtpm:
- kfree(dtpm);
return ret;
}
-int dtpm_register_cpu(struct dtpm *parent)
+static int __init dtpm_cpu_init(void)
{
- __parent = parent;
+ int ret;
+
+ /*
+ * The callbacks at CPU hotplug time are calling
+ * dtpm_update_power() which in turns calls update_pd_power().
+ *
+ * The function update_pd_power() uses the online mask to
+ * figure out the power consumption limits.
+ *
+ * At CPUHP_AP_ONLINE_DYN, the CPU is present in the CPU
+ * online mask when the cpuhp_dtpm_cpu_online function is
+ * called, but the CPU is still in the online mask for the
+ * tear down callback. So the power can not be updated when
+ * the CPU is unplugged.
+ *
+ * At CPUHP_AP_DTPM_CPU_DEAD, the situation is the opposite as
+ * above. The CPU online mask is not up to date when the CPU
+ * is plugged in.
+ *
+ * For this reason, we need to call the online and offline
+ * callbacks at different moments when the CPU online mask is
+ * consistent with the power numbers we want to update.
+ */
+ ret = cpuhp_setup_state(CPUHP_AP_DTPM_CPU_DEAD, "dtpm_cpu:offline",
+ NULL, cpuhp_dtpm_cpu_offline);
+ if (ret < 0)
+ return ret;
+
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "dtpm_cpu:online",
+ cpuhp_dtpm_cpu_online, NULL);
+ if (ret < 0)
+ return ret;
- return cpuhp_setup_state(CPUHP_AP_DTPM_CPU_ONLINE,
- "dtpm_cpu:online",
- cpuhp_dtpm_cpu_online,
- cpuhp_dtpm_cpu_offline);
+ return 0;
}
+
+DTPM_DECLARE(dtpm_cpu, dtpm_cpu_init);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 9839dee348f6..1ab29e61b078 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -119,6 +119,13 @@ struct cpufreq_policy {
bool strict_target;
/*
+ * Set if inefficient frequencies were found in the frequency table.
+ * This indicates if the relation flag CPUFREQ_RELATION_E can be
+ * honored.
+ */
+ bool efficiencies_available;
+
+ /*
* Preferred average time interval between consecutive invocations of
* the driver to set the frequency for this policy. To be set by the
* scaling driver (0, which is the default, means no preference).
@@ -273,6 +280,12 @@ static inline void cpufreq_stats_record_transition(struct cpufreq_policy *policy
#define CPUFREQ_RELATION_L 0 /* lowest frequency at or above target */
#define CPUFREQ_RELATION_H 1 /* highest frequency below or at target */
#define CPUFREQ_RELATION_C 2 /* closest frequency to target */
+/* relation flags */
+#define CPUFREQ_RELATION_E BIT(2) /* Get if possible an efficient frequency */
+
+#define CPUFREQ_RELATION_LE (CPUFREQ_RELATION_L | CPUFREQ_RELATION_E)
+#define CPUFREQ_RELATION_HE (CPUFREQ_RELATION_H | CPUFREQ_RELATION_E)
+#define CPUFREQ_RELATION_CE (CPUFREQ_RELATION_C | CPUFREQ_RELATION_E)
struct freq_attr {
struct attribute attr;
@@ -627,9 +640,11 @@ struct cpufreq_governor *cpufreq_fallback_governor(void);
static inline void cpufreq_policy_apply_limits(struct cpufreq_policy *policy)
{
if (policy->max < policy->cur)
- __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H);
+ __cpufreq_driver_target(policy, policy->max,
+ CPUFREQ_RELATION_HE);
else if (policy->min > policy->cur)
- __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L);
+ __cpufreq_driver_target(policy, policy->min,
+ CPUFREQ_RELATION_LE);
}
/* Governor attribute set */
@@ -660,10 +675,11 @@ struct governor_attr {
*********************************************************************/
/* Special Values of .frequency field */
-#define CPUFREQ_ENTRY_INVALID ~0u
-#define CPUFREQ_TABLE_END ~1u
+#define CPUFREQ_ENTRY_INVALID ~0u
+#define CPUFREQ_TABLE_END ~1u
/* Special Values of .flags field */
-#define CPUFREQ_BOOST_FREQ (1 << 0)
+#define CPUFREQ_BOOST_FREQ (1 << 0)
+#define CPUFREQ_INEFFICIENT_FREQ (1 << 1)
struct cpufreq_frequency_table {
unsigned int flags;
@@ -740,6 +756,22 @@ static inline void dev_pm_opp_free_cpufreq_table(struct device *dev,
continue; \
else
+/**
+ * cpufreq_for_each_efficient_entry_idx - iterate with index over a cpufreq
+ * frequency_table excluding CPUFREQ_ENTRY_INVALID and
+ * CPUFREQ_INEFFICIENT_FREQ frequencies.
+ * @pos: the &struct cpufreq_frequency_table to use as a loop cursor.
+ * @table: the &struct cpufreq_frequency_table to iterate over.
+ * @idx: the table entry currently being processed.
+ * @efficiencies: set to true to only iterate over efficient frequencies.
+ */
+
+#define cpufreq_for_each_efficient_entry_idx(pos, table, idx, efficiencies) \
+ cpufreq_for_each_valid_entry_idx(pos, table, idx) \
+ if (efficiencies && (pos->flags & CPUFREQ_INEFFICIENT_FREQ)) \
+ continue; \
+ else
+
int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
struct cpufreq_frequency_table *table);
@@ -764,14 +796,15 @@ bool policy_has_boost_freq(struct cpufreq_policy *policy);
/* Find lowest freq at or above target in a table in ascending order */
static inline int cpufreq_table_find_index_al(struct cpufreq_policy *policy,
- unsigned int target_freq)
+ unsigned int target_freq,
+ bool efficiencies)
{
struct cpufreq_frequency_table *table = policy->freq_table;
struct cpufreq_frequency_table *pos;
unsigned int freq;
int idx, best = -1;
- cpufreq_for_each_valid_entry_idx(pos, table, idx) {
+ cpufreq_for_each_efficient_entry_idx(pos, table, idx, efficiencies) {
freq = pos->frequency;
if (freq >= target_freq)
@@ -785,14 +818,15 @@ static inline int cpufreq_table_find_index_al(struct cpufreq_policy *policy,
/* Find lowest freq at or above target in a table in descending order */
static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy,
- unsigned int target_freq)
+ unsigned int target_freq,
+ bool efficiencies)
{
struct cpufreq_frequency_table *table = policy->freq_table;
struct cpufreq_frequency_table *pos;
unsigned int freq;
int idx, best = -1;
- cpufreq_for_each_valid_entry_idx(pos, table, idx) {
+ cpufreq_for_each_efficient_entry_idx(pos, table, idx, efficiencies) {
freq = pos->frequency;
if (freq == target_freq)
@@ -815,26 +849,30 @@ static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy,
/* Works only on sorted freq-tables */
static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy,
- unsigned int target_freq)
+ unsigned int target_freq,
+ bool efficiencies)
{
target_freq = clamp_val(target_freq, policy->min, policy->max);
if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING)
- return cpufreq_table_find_index_al(policy, target_freq);
+ return cpufreq_table_find_index_al(policy, target_freq,
+ efficiencies);
else
- return cpufreq_table_find_index_dl(policy, target_freq);
+ return cpufreq_table_find_index_dl(policy, target_freq,
+ efficiencies);
}
/* Find highest freq at or below target in a table in ascending order */
static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy,
- unsigned int target_freq)
+ unsigned int target_freq,
+ bool efficiencies)
{
struct cpufreq_frequency_table *table = policy->freq_table;
struct cpufreq_frequency_table *pos;
unsigned int freq;
int idx, best = -1;
- cpufreq_for_each_valid_entry_idx(pos, table, idx) {
+ cpufreq_for_each_efficient_entry_idx(pos, table, idx, efficiencies) {
freq = pos->frequency;
if (freq == target_freq)
@@ -857,14 +895,15 @@ static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy,
/* Find highest freq at or below target in a table in descending order */
static inline int cpufreq_table_find_index_dh(struct cpufreq_policy *policy,
- unsigned int target_freq)
+ unsigned int target_freq,
+ bool efficiencies)
{
struct cpufreq_frequency_table *table = policy->freq_table;
struct cpufreq_frequency_table *pos;
unsigned int freq;
int idx, best = -1;
- cpufreq_for_each_valid_entry_idx(pos, table, idx) {
+ cpufreq_for_each_efficient_entry_idx(pos, table, idx, efficiencies) {
freq = pos->frequency;
if (freq <= target_freq)
@@ -878,26 +917,30 @@ static inline int cpufreq_table_find_index_dh(struct cpufreq_policy *policy,
/* Works only on sorted freq-tables */
static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy,
- unsigned int target_freq)
+ unsigned int target_freq,
+ bool efficiencies)
{
target_freq = clamp_val(target_freq, policy->min, policy->max);
if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING)
- return cpufreq_table_find_index_ah(policy, target_freq);
+ return cpufreq_table_find_index_ah(policy, target_freq,
+ efficiencies);
else
- return cpufreq_table_find_index_dh(policy, target_freq);
+ return cpufreq_table_find_index_dh(policy, target_freq,
+ efficiencies);
}
/* Find closest freq to target in a table in ascending order */
static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy,
- unsigned int target_freq)
+ unsigned int target_freq,
+ bool efficiencies)
{
struct cpufreq_frequency_table *table = policy->freq_table;
struct cpufreq_frequency_table *pos;
unsigned int freq;
int idx, best = -1;
- cpufreq_for_each_valid_entry_idx(pos, table, idx) {
+ cpufreq_for_each_efficient_entry_idx(pos, table, idx, efficiencies) {
freq = pos->frequency;
if (freq == target_freq)
@@ -924,14 +967,15 @@ static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy,
/* Find closest freq to target in a table in descending order */
static inline int cpufreq_table_find_index_dc(struct cpufreq_policy *policy,
- unsigned int target_freq)
+ unsigned int target_freq,
+ bool efficiencies)
{
struct cpufreq_frequency_table *table = policy->freq_table;
struct cpufreq_frequency_table *pos;
unsigned int freq;
int idx, best = -1;
- cpufreq_for_each_valid_entry_idx(pos, table, idx) {
+ cpufreq_for_each_efficient_entry_idx(pos, table, idx, efficiencies) {
freq = pos->frequency;
if (freq == target_freq)
@@ -958,35 +1002,58 @@ static inline int cpufreq_table_find_index_dc(struct cpufreq_policy *policy,
/* Works only on sorted freq-tables */
static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy,
- unsigned int target_freq)
+ unsigned int target_freq,
+ bool efficiencies)
{
target_freq = clamp_val(target_freq, policy->min, policy->max);
if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING)
- return cpufreq_table_find_index_ac(policy, target_freq);
+ return cpufreq_table_find_index_ac(policy, target_freq,
+ efficiencies);
else
- return cpufreq_table_find_index_dc(policy, target_freq);
+ return cpufreq_table_find_index_dc(policy, target_freq,
+ efficiencies);
}
static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
{
+ bool efficiencies = policy->efficiencies_available &&
+ (relation & CPUFREQ_RELATION_E);
+ int idx;
+
+ /* cpufreq_table_index_unsorted() has no use for this flag anyway */
+ relation &= ~CPUFREQ_RELATION_E;
+
if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED))
return cpufreq_table_index_unsorted(policy, target_freq,
relation);
-
+retry:
switch (relation) {
case CPUFREQ_RELATION_L:
- return cpufreq_table_find_index_l(policy, target_freq);
+ idx = cpufreq_table_find_index_l(policy, target_freq,
+ efficiencies);
+ break;
case CPUFREQ_RELATION_H:
- return cpufreq_table_find_index_h(policy, target_freq);
+ idx = cpufreq_table_find_index_h(policy, target_freq,
+ efficiencies);
+ break;
case CPUFREQ_RELATION_C:
- return cpufreq_table_find_index_c(policy, target_freq);
+ idx = cpufreq_table_find_index_c(policy, target_freq,
+ efficiencies);
+ break;
default:
WARN_ON_ONCE(1);
return 0;
}
+
+ if (idx < 0 && efficiencies) {
+ efficiencies = false;
+ goto retry;
+ }
+
+ return idx;
}
static inline int cpufreq_table_count_valid_entries(const struct cpufreq_policy *policy)
@@ -1003,6 +1070,37 @@ static inline int cpufreq_table_count_valid_entries(const struct cpufreq_policy
return count;
}
+/**
+ * cpufreq_table_set_inefficient() - Mark a frequency as inefficient
+ * @policy: the &struct cpufreq_policy containing the inefficient frequency
+ * @frequency: the inefficient frequency
+ *
+ * The &struct cpufreq_policy must use a sorted frequency table
+ *
+ * Return: %0 on success or a negative errno code
+ */
+
+static inline int
+cpufreq_table_set_inefficient(struct cpufreq_policy *policy,
+ unsigned int frequency)
+{
+ struct cpufreq_frequency_table *pos;
+
+ /* Not supported */
+ if (policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED)
+ return -EINVAL;
+
+ cpufreq_for_each_valid_entry(pos, policy->freq_table) {
+ if (pos->frequency == frequency) {
+ pos->flags |= CPUFREQ_INEFFICIENT_FREQ;
+ policy->efficiencies_available = true;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
static inline int parse_perf_domain(int cpu, const char *list_name,
const char *cell_name)
{
@@ -1071,6 +1169,13 @@ static inline bool policy_has_boost_freq(struct cpufreq_policy *policy)
return false;
}
+static inline int
+cpufreq_table_set_inefficient(struct cpufreq_policy *policy,
+ unsigned int frequency)
+{
+ return -EINVAL;
+}
+
static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name,
const char *cell_name, struct cpumask *cpumask)
{
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 991911048857..773c83730906 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -99,6 +99,7 @@ enum cpuhp_state {
CPUHP_LUSTRE_CFS_DEAD,
CPUHP_AP_ARM_CACHE_B15_RAC_DEAD,
CPUHP_PADATA_DEAD,
+ CPUHP_AP_DTPM_CPU_DEAD,
CPUHP_WORKQUEUE_PREP,
CPUHP_POWER_NUMA_PREPARE,
CPUHP_HRTIMERS_PREPARE,
@@ -246,7 +247,6 @@ enum cpuhp_state {
CPUHP_AP_MM_DEMOTION_ONLINE,
CPUHP_AP_X86_HPET_ONLINE,
CPUHP_AP_X86_KVM_CLK_ONLINE,
- CPUHP_AP_DTPM_CPU_ONLINE,
CPUHP_AP_ACTIVE,
CPUHP_ONLINE,
};
diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h
index e80a332e3d8a..2890f6370eb9 100644
--- a/include/linux/dtpm.h
+++ b/include/linux/dtpm.h
@@ -23,34 +23,32 @@ struct dtpm {
u64 power_max;
u64 power_min;
int weight;
- void *private;
};
struct dtpm_ops {
u64 (*set_power_uw)(struct dtpm *, u64);
u64 (*get_power_uw)(struct dtpm *);
+ int (*update_power_uw)(struct dtpm *);
void (*release)(struct dtpm *);
};
-struct dtpm_descr;
-
-typedef int (*dtpm_init_t)(struct dtpm_descr *);
+typedef int (*dtpm_init_t)(void);
struct dtpm_descr {
- struct dtpm *parent;
- const char *name;
dtpm_init_t init;
};
/* Init section thermal table */
-extern struct dtpm_descr *__dtpm_table[];
-extern struct dtpm_descr *__dtpm_table_end[];
+extern struct dtpm_descr __dtpm_table[];
+extern struct dtpm_descr __dtpm_table_end[];
-#define DTPM_TABLE_ENTRY(name) \
- static typeof(name) *__dtpm_table_entry_##name \
- __used __section("__dtpm_table") = &name
+#define DTPM_TABLE_ENTRY(name, __init) \
+ static struct dtpm_descr __dtpm_table_entry_##name \
+ __used __section("__dtpm_table") = { \
+ .init = __init, \
+ }
-#define DTPM_DECLARE(name) DTPM_TABLE_ENTRY(name)
+#define DTPM_DECLARE(name, init) DTPM_TABLE_ENTRY(name, init)
#define for_each_dtpm_table(__dtpm) \
for (__dtpm = __dtpm_table; \
@@ -62,11 +60,11 @@ static inline struct dtpm *to_dtpm(struct powercap_zone *zone)
return container_of(zone, struct dtpm, zone);
}
-int dtpm_update_power(struct dtpm *dtpm, u64 power_min, u64 power_max);
+int dtpm_update_power(struct dtpm *dtpm);
int dtpm_release_zone(struct powercap_zone *pcz);
-struct dtpm *dtpm_alloc(struct dtpm_ops *ops);
+void dtpm_init(struct dtpm *dtpm, struct dtpm_ops *ops);
void dtpm_unregister(struct dtpm *dtpm);
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 39dcadd492b5..6377adc3b78d 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -17,19 +17,30 @@
* device). It can be a total power: static and dynamic.
* @cost: The cost coefficient associated with this level, used during
* energy calculation. Equal to: power * max_frequency / frequency
+ * @flags: see "em_perf_state flags" description below.
*/
struct em_perf_state {
unsigned long frequency;
unsigned long power;
unsigned long cost;
+ unsigned long flags;
};
+/*
+ * em_perf_state flags:
+ *
+ * EM_PERF_STATE_INEFFICIENT: The performance state is inefficient. There is
+ * in this em_perf_domain, another performance state with a higher frequency
+ * but a lower or equal power cost. Such inefficient states are ignored when
+ * using em_pd_get_efficient_*() functions.
+ */
+#define EM_PERF_STATE_INEFFICIENT BIT(0)
+
/**
* struct em_perf_domain - Performance domain
* @table: List of performance states, in ascending order
* @nr_perf_states: Number of performance states
- * @milliwatts: Flag indicating the power values are in milli-Watts
- * or some other scale.
+ * @flags: See "em_perf_domain flags"
* @cpus: Cpumask covering the CPUs of the domain. It's here
* for performance reasons to avoid potential cache
* misses during energy calculations in the scheduler
@@ -44,10 +55,22 @@ struct em_perf_state {
struct em_perf_domain {
struct em_perf_state *table;
int nr_perf_states;
- int milliwatts;
+ unsigned long flags;
unsigned long cpus[];
};
+/*
+ * em_perf_domain flags:
+ *
+ * EM_PERF_DOMAIN_MILLIWATTS: The power values are in milli-Watts or some
+ * other scale.
+ *
+ * EM_PERF_DOMAIN_SKIP_INEFFICIENCIES: Skip inefficient states when estimating
+ * energy consumption.
+ */
+#define EM_PERF_DOMAIN_MILLIWATTS BIT(0)
+#define EM_PERF_DOMAIN_SKIP_INEFFICIENCIES BIT(1)
+
#define em_span_cpus(em) (to_cpumask((em)->cpus))
#ifdef CONFIG_ENERGY_MODEL
@@ -102,6 +125,37 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
void em_dev_unregister_perf_domain(struct device *dev);
/**
+ * em_pd_get_efficient_state() - Get an efficient performance state from the EM
+ * @pd : Performance domain for which we want an efficient frequency
+ * @freq : Frequency to map with the EM
+ *
+ * It is called from the scheduler code quite frequently and as a consequence
+ * doesn't implement any check.
+ *
+ * Return: An efficient performance state, high enough to meet @freq
+ * requirement.
+ */
+static inline
+struct em_perf_state *em_pd_get_efficient_state(struct em_perf_domain *pd,
+ unsigned long freq)
+{
+ struct em_perf_state *ps;
+ int i;
+
+ for (i = 0; i < pd->nr_perf_states; i++) {
+ ps = &pd->table[i];
+ if (ps->frequency >= freq) {
+ if (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES &&
+ ps->flags & EM_PERF_STATE_INEFFICIENT)
+ continue;
+ break;
+ }
+ }
+
+ return ps;
+}
+
+/**
* em_cpu_energy() - Estimates the energy consumed by the CPUs of a
* performance domain
* @pd : performance domain for which energy has to be estimated
@@ -123,7 +177,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
{
unsigned long freq, scale_cpu;
struct em_perf_state *ps;
- int i, cpu;
+ int cpu;
if (!sum_util)
return 0;
@@ -148,11 +202,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
* Find the lowest performance state of the Energy Model above the
* requested frequency.
*/
- for (i = 0; i < pd->nr_perf_states; i++) {
- ps = &pd->table[i];
- if (ps->frequency >= freq)
- break;
- }
+ ps = em_pd_get_efficient_state(pd, freq);
/*
* The capacity of a CPU in the domain at the performance state (ps)
diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
index a332ccd829e2..0153b0ca7b23 100644
--- a/kernel/power/energy_model.c
+++ b/kernel/power/energy_model.c
@@ -2,7 +2,7 @@
/*
* Energy Model of devices
*
- * Copyright (c) 2018-2020, Arm ltd.
+ * Copyright (c) 2018-2021, Arm ltd.
* Written by: Quentin Perret, Arm ltd.
* Improvements provided by: Lukasz Luba, Arm ltd.
*/
@@ -10,6 +10,7 @@
#define pr_fmt(fmt) "energy_model: " fmt
#include <linux/cpu.h>
+#include <linux/cpufreq.h>
#include <linux/cpumask.h>
#include <linux/debugfs.h>
#include <linux/energy_model.h>
@@ -42,6 +43,7 @@ static void em_debug_create_ps(struct em_perf_state *ps, struct dentry *pd)
debugfs_create_ulong("frequency", 0444, d, &ps->frequency);
debugfs_create_ulong("power", 0444, d, &ps->power);
debugfs_create_ulong("cost", 0444, d, &ps->cost);
+ debugfs_create_ulong("inefficient", 0444, d, &ps->flags);
}
static int em_debug_cpus_show(struct seq_file *s, void *unused)
@@ -55,7 +57,8 @@ DEFINE_SHOW_ATTRIBUTE(em_debug_cpus);
static int em_debug_units_show(struct seq_file *s, void *unused)
{
struct em_perf_domain *pd = s->private;
- char *units = pd->milliwatts ? "milliWatts" : "bogoWatts";
+ char *units = (pd->flags & EM_PERF_DOMAIN_MILLIWATTS) ?
+ "milliWatts" : "bogoWatts";
seq_printf(s, "%s\n", units);
@@ -63,6 +66,17 @@ static int em_debug_units_show(struct seq_file *s, void *unused)
}
DEFINE_SHOW_ATTRIBUTE(em_debug_units);
+static int em_debug_skip_inefficiencies_show(struct seq_file *s, void *unused)
+{
+ struct em_perf_domain *pd = s->private;
+ int enabled = (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES) ? 1 : 0;
+
+ seq_printf(s, "%d\n", enabled);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(em_debug_skip_inefficiencies);
+
static void em_debug_create_pd(struct device *dev)
{
struct dentry *d;
@@ -76,6 +90,8 @@ static void em_debug_create_pd(struct device *dev)
&em_debug_cpus_fops);
debugfs_create_file("units", 0444, d, dev->em_pd, &em_debug_units_fops);
+ debugfs_create_file("skip-inefficiencies", 0444, d, dev->em_pd,
+ &em_debug_skip_inefficiencies_fops);
/* Create a sub-directory for each performance state */
for (i = 0; i < dev->em_pd->nr_perf_states; i++)
@@ -107,8 +123,7 @@ static void em_debug_remove_pd(struct device *dev) {}
static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
int nr_states, struct em_data_callback *cb)
{
- unsigned long opp_eff, prev_opp_eff = ULONG_MAX;
- unsigned long power, freq, prev_freq = 0;
+ unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX;
struct em_perf_state *table;
int i, ret;
u64 fmax;
@@ -153,27 +168,22 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
table[i].power = power;
table[i].frequency = prev_freq = freq;
-
- /*
- * The hertz/watts efficiency ratio should decrease as the
- * frequency grows on sane platforms. But this isn't always
- * true in practice so warn the user if a higher OPP is more
- * power efficient than a lower one.
- */
- opp_eff = freq / power;
- if (opp_eff >= prev_opp_eff)
- dev_dbg(dev, "EM: hertz/watts ratio non-monotonically decreasing: em_perf_state %d >= em_perf_state%d\n",
- i, i - 1);
- prev_opp_eff = opp_eff;
}
/* Compute the cost of each performance state. */
fmax = (u64) table[nr_states - 1].frequency;
- for (i = 0; i < nr_states; i++) {
+ for (i = nr_states - 1; i >= 0; i--) {
unsigned long power_res = em_scale_power(table[i].power);
table[i].cost = div64_u64(fmax * power_res,
table[i].frequency);
+ if (table[i].cost >= prev_cost) {
+ table[i].flags = EM_PERF_STATE_INEFFICIENT;
+ dev_dbg(dev, "EM: OPP:%lu is inefficient\n",
+ table[i].frequency);
+ } else {
+ prev_cost = table[i].cost;
+ }
}
pd->table = table;
@@ -222,6 +232,43 @@ static int em_create_pd(struct device *dev, int nr_states,
return 0;
}
+static void em_cpufreq_update_efficiencies(struct device *dev)
+{
+ struct em_perf_domain *pd = dev->em_pd;
+ struct em_perf_state *table;
+ struct cpufreq_policy *policy;
+ int found = 0;
+ int i;
+
+ if (!_is_cpu_device(dev) || !pd)
+ return;
+
+ policy = cpufreq_cpu_get(cpumask_first(em_span_cpus(pd)));
+ if (!policy) {
+ dev_warn(dev, "EM: Access to CPUFreq policy failed");
+ return;
+ }
+
+ table = pd->table;
+
+ for (i = 0; i < pd->nr_perf_states; i++) {
+ if (!(table[i].flags & EM_PERF_STATE_INEFFICIENT))
+ continue;
+
+ if (!cpufreq_table_set_inefficient(policy, table[i].frequency))
+ found++;
+ }
+
+ if (!found)
+ return;
+
+ /*
+ * Efficiencies have been installed in CPUFreq, inefficient frequencies
+ * will be skipped. The EM can do the same.
+ */
+ pd->flags |= EM_PERF_DOMAIN_SKIP_INEFFICIENCIES;
+}
+
/**
* em_pd_get() - Return the performance domain for a device
* @dev : Device to find the performance domain for
@@ -335,7 +382,10 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
if (ret)
goto unlock;
- dev->em_pd->milliwatts = milliwatts;
+ if (milliwatts)
+ dev->em_pd->flags |= EM_PERF_DOMAIN_MILLIWATTS;
+
+ em_cpufreq_update_efficiencies(dev);
em_debug_create_pd(dev);
dev_info(dev, "EM: created perf domain\n");