diff options
Diffstat (limited to 'drivers/thermal/gov_power_allocator.c')
| -rw-r--r-- | drivers/thermal/gov_power_allocator.c | 801 |
1 files changed, 801 insertions, 0 deletions
diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c new file mode 100644 index 000000000000..0d9f636c80f4 --- /dev/null +++ b/drivers/thermal/gov_power_allocator.c @@ -0,0 +1,801 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * A power allocator to manage temperature + * + * Copyright (C) 2014 ARM Ltd. + * + */ + +#define pr_fmt(fmt) "Power allocator: " fmt + +#include <linux/slab.h> +#include <linux/thermal.h> + +#define CREATE_TRACE_POINTS +#include "thermal_trace_ipa.h" + +#include "thermal_core.h" + +#define FRAC_BITS 10 +#define int_to_frac(x) ((x) << FRAC_BITS) +#define frac_to_int(x) ((x) >> FRAC_BITS) + +/** + * mul_frac() - multiply two fixed-point numbers + * @x: first multiplicand + * @y: second multiplicand + * + * Return: the result of multiplying two fixed-point numbers. The + * result is also a fixed-point number. + */ +static inline s64 mul_frac(s64 x, s64 y) +{ + return (x * y) >> FRAC_BITS; +} + +/** + * div_frac() - divide two fixed-point numbers + * @x: the dividend + * @y: the divisor + * + * Return: the result of dividing two fixed-point numbers. The + * result is also a fixed-point number. + */ +static inline s64 div_frac(s64 x, s64 y) +{ + return div_s64(x << FRAC_BITS, y); +} + +/** + * struct power_actor - internal power information for power actor + * @req_power: requested power value (not weighted) + * @max_power: max allocatable power for this actor + * @granted_power: granted power for this actor + * @extra_actor_power: extra power that this actor can receive + * @weighted_req_power: weighted requested power as input to IPA + */ +struct power_actor { + u32 req_power; + u32 max_power; + u32 granted_power; + u32 extra_actor_power; + u32 weighted_req_power; +}; + +/** + * struct power_allocator_params - parameters for the power allocator governor + * @allocated_tzp: whether we have allocated tzp for this thermal zone and + * it needs to be freed on unbind + * @update_cdevs: whether or not update cdevs on the next run + * @err_integral: accumulated error in the PID controller. + * @prev_err: error in the previous iteration of the PID controller. + * Used to calculate the derivative term. + * @sustainable_power: Sustainable power (heat) that this thermal zone can + * dissipate + * @trip_switch_on: first passive trip point of the thermal zone. The + * governor switches on when this trip point is crossed. + * If the thermal zone only has one passive trip point, + * @trip_switch_on should be NULL. + * @trip_max: last passive trip point of the thermal zone. The + * temperature we are controlling for. + * @total_weight: Sum of all thermal instances weights + * @num_actors: number of cooling devices supporting IPA callbacks + * @buffer_size: internal buffer size, to avoid runtime re-calculation + * @power: buffer for all power actors internal power information + */ +struct power_allocator_params { + bool allocated_tzp; + bool update_cdevs; + s64 err_integral; + s32 prev_err; + u32 sustainable_power; + const struct thermal_trip *trip_switch_on; + const struct thermal_trip *trip_max; + int total_weight; + unsigned int num_actors; + unsigned int buffer_size; + struct power_actor *power; +}; + +static bool power_actor_is_valid(struct thermal_instance *instance) +{ + return cdev_is_power_actor(instance->cdev); +} + +/** + * estimate_sustainable_power() - Estimate the sustainable power of a thermal zone + * @tz: thermal zone we are operating in + * + * For thermal zones that don't provide a sustainable_power in their + * thermal_zone_params, estimate one. Calculate it using the minimum + * power of all the cooling devices as that gives a valid value that + * can give some degree of functionality. For optimal performance of + * this governor, provide a sustainable_power in the thermal zone's + * thermal_zone_params. + */ +static u32 estimate_sustainable_power(struct thermal_zone_device *tz) +{ + struct power_allocator_params *params = tz->governor_data; + const struct thermal_trip_desc *td = trip_to_trip_desc(params->trip_max); + struct thermal_cooling_device *cdev; + struct thermal_instance *instance; + u32 sustainable_power = 0; + u32 min_power; + + list_for_each_entry(instance, &td->thermal_instances, trip_node) { + if (!power_actor_is_valid(instance)) + continue; + + cdev = instance->cdev; + if (cdev->ops->state2power(cdev, instance->upper, &min_power)) + continue; + + sustainable_power += min_power; + } + + return sustainable_power; +} + +/** + * estimate_pid_constants() - Estimate the constants for the PID controller + * @tz: thermal zone for which to estimate the constants + * @sustainable_power: sustainable power for the thermal zone + * @trip_switch_on: trip point for the switch on temperature + * @control_temp: target temperature for the power allocator governor + * + * This function is used to update the estimation of the PID + * controller constants in struct thermal_zone_parameters. + */ +static void estimate_pid_constants(struct thermal_zone_device *tz, + u32 sustainable_power, + const struct thermal_trip *trip_switch_on, + int control_temp) +{ + u32 temperature_threshold = control_temp; + s32 k_i; + + if (trip_switch_on) + temperature_threshold -= trip_switch_on->temperature; + + /* + * estimate_pid_constants() tries to find appropriate default + * values for thermal zones that don't provide them. If a + * system integrator has configured a thermal zone with two + * passive trip points at the same temperature, that person + * hasn't put any effort to set up the thermal zone properly + * so just give up. + */ + if (!temperature_threshold) + return; + + tz->tzp->k_po = int_to_frac(sustainable_power) / + temperature_threshold; + + tz->tzp->k_pu = int_to_frac(2 * sustainable_power) / + temperature_threshold; + + k_i = tz->tzp->k_pu / 10; + tz->tzp->k_i = k_i > 0 ? k_i : 1; + + /* + * The default for k_d and integral_cutoff is 0, so we can + * leave them as they are. + */ +} + +/** + * get_sustainable_power() - Get the right sustainable power + * @tz: thermal zone for which to estimate the constants + * @params: parameters for the power allocator governor + * @control_temp: target temperature for the power allocator governor + * + * This function is used for getting the proper sustainable power value based + * on variables which might be updated by the user sysfs interface. If that + * happen the new value is going to be estimated and updated. It is also used + * after thermal zone binding, where the initial values where set to 0. + */ +static u32 get_sustainable_power(struct thermal_zone_device *tz, + struct power_allocator_params *params, + int control_temp) +{ + u32 sustainable_power; + + if (!tz->tzp->sustainable_power) + sustainable_power = estimate_sustainable_power(tz); + else + sustainable_power = tz->tzp->sustainable_power; + + /* Check if it's init value 0 or there was update via sysfs */ + if (sustainable_power != params->sustainable_power) { + estimate_pid_constants(tz, sustainable_power, + params->trip_switch_on, control_temp); + + /* Do the estimation only once and make available in sysfs */ + tz->tzp->sustainable_power = sustainable_power; + params->sustainable_power = sustainable_power; + } + + return sustainable_power; +} + +/** + * pid_controller() - PID controller + * @tz: thermal zone we are operating in + * @control_temp: the target temperature in millicelsius + * @max_allocatable_power: maximum allocatable power for this thermal zone + * + * This PID controller increases the available power budget so that the + * temperature of the thermal zone gets as close as possible to + * @control_temp and limits the power if it exceeds it. k_po is the + * proportional term when we are overshooting, k_pu is the + * proportional term when we are undershooting. integral_cutoff is a + * threshold below which we stop accumulating the error. The + * accumulated error is only valid if the requested power will make + * the system warmer. If the system is mostly idle, there's no point + * in accumulating positive error. + * + * Return: The power budget for the next period. + */ +static u32 pid_controller(struct thermal_zone_device *tz, + int control_temp, + u32 max_allocatable_power) +{ + struct power_allocator_params *params = tz->governor_data; + s64 p, i, d, power_range; + s32 err, max_power_frac; + u32 sustainable_power; + + max_power_frac = int_to_frac(max_allocatable_power); + + sustainable_power = get_sustainable_power(tz, params, control_temp); + + err = control_temp - tz->temperature; + err = int_to_frac(err); + + /* Calculate the proportional term */ + p = mul_frac(err < 0 ? tz->tzp->k_po : tz->tzp->k_pu, err); + + /* + * Calculate the integral term + * + * if the error is less than cut off allow integration (but + * the integral is limited to max power) + */ + i = mul_frac(tz->tzp->k_i, params->err_integral); + + if (err < int_to_frac(tz->tzp->integral_cutoff)) { + s64 i_next = i + mul_frac(tz->tzp->k_i, err); + + if (abs(i_next) < max_power_frac) { + i = i_next; + params->err_integral += err; + } + } + + /* + * Calculate the derivative term + * + * We do err - prev_err, so with a positive k_d, a decreasing + * error (i.e. driving closer to the line) results in less + * power being applied, slowing down the controller) + */ + d = mul_frac(tz->tzp->k_d, err - params->prev_err); + d = div_frac(d, jiffies_to_msecs(tz->passive_delay_jiffies)); + params->prev_err = err; + + power_range = p + i + d; + + /* feed-forward the known sustainable dissipatable power */ + power_range = sustainable_power + frac_to_int(power_range); + + power_range = clamp(power_range, (s64)0, (s64)max_allocatable_power); + + trace_thermal_power_allocator_pid(tz, frac_to_int(err), + frac_to_int(params->err_integral), + frac_to_int(p), frac_to_int(i), + frac_to_int(d), power_range); + + return power_range; +} + +/** + * power_actor_set_power() - limit the maximum power a cooling device consumes + * @cdev: pointer to &thermal_cooling_device + * @instance: thermal instance to update + * @power: the power in milliwatts + * + * Set the cooling device to consume at most @power milliwatts. The limit is + * expected to be a cap at the maximum power consumption. + * + * Return: 0 on success, -EINVAL if the cooling device does not + * implement the power actor API or -E* for other failures. + */ +static int +power_actor_set_power(struct thermal_cooling_device *cdev, + struct thermal_instance *instance, u32 power) +{ + unsigned long state; + int ret; + + ret = cdev->ops->power2state(cdev, power, &state); + if (ret) + return ret; + + instance->target = clamp_val(state, instance->lower, instance->upper); + + thermal_cdev_update_nocheck(cdev); + + return 0; +} + +/** + * divvy_up_power() - divvy the allocated power between the actors + * @power: buffer for all power actors internal power information + * @num_actors: number of power actors in this thermal zone + * @total_req_power: sum of all weighted requested power for all actors + * @power_range: total allocated power + * + * This function divides the total allocated power (@power_range) + * fairly between the actors. It first tries to give each actor a + * share of the @power_range according to how much power it requested + * compared to the rest of the actors. For example, if only one actor + * requests power, then it receives all the @power_range. If + * three actors each requests 1mW, each receives a third of the + * @power_range. + * + * If any actor received more than their maximum power, then that + * surplus is re-divvied among the actors based on how far they are + * from their respective maximums. + */ +static void divvy_up_power(struct power_actor *power, int num_actors, + u32 total_req_power, u32 power_range) +{ + u32 capped_extra_power = 0; + u32 extra_power = 0; + int i; + + if (!total_req_power) { + /* + * Nobody requested anything, just give everybody + * the maximum power + */ + for (i = 0; i < num_actors; i++) { + struct power_actor *pa = &power[i]; + + pa->granted_power = pa->max_power; + } + + return; + } + + for (i = 0; i < num_actors; i++) { + struct power_actor *pa = &power[i]; + u64 req_range = (u64)pa->weighted_req_power * power_range; + + pa->granted_power = DIV_ROUND_CLOSEST_ULL(req_range, + total_req_power); + + if (pa->granted_power > pa->max_power) { + extra_power += pa->granted_power - pa->max_power; + pa->granted_power = pa->max_power; + } + + pa->extra_actor_power = pa->max_power - pa->granted_power; + capped_extra_power += pa->extra_actor_power; + } + + if (!extra_power || !capped_extra_power) + return; + + /* + * Re-divvy the reclaimed extra among actors based on + * how far they are from the max + */ + extra_power = min(extra_power, capped_extra_power); + + for (i = 0; i < num_actors; i++) { + struct power_actor *pa = &power[i]; + u64 extra_range = pa->extra_actor_power; + + extra_range *= extra_power; + pa->granted_power += DIV_ROUND_CLOSEST_ULL(extra_range, + capped_extra_power); + } +} + +static void allocate_power(struct thermal_zone_device *tz, int control_temp) +{ + struct power_allocator_params *params = tz->governor_data; + const struct thermal_trip_desc *td = trip_to_trip_desc(params->trip_max); + unsigned int num_actors = params->num_actors; + struct power_actor *power = params->power; + struct thermal_cooling_device *cdev; + struct thermal_instance *instance; + u32 total_weighted_req_power = 0; + u32 max_allocatable_power = 0; + u32 total_granted_power = 0; + u32 total_req_power = 0; + u32 power_range, weight; + int i = 0, ret; + + if (!num_actors) + return; + + /* Clean all buffers for new power estimations */ + memset(power, 0, params->buffer_size); + + list_for_each_entry(instance, &td->thermal_instances, trip_node) { + struct power_actor *pa = &power[i]; + + if (!power_actor_is_valid(instance)) + continue; + + cdev = instance->cdev; + + ret = cdev->ops->get_requested_power(cdev, &pa->req_power); + if (ret) + continue; + + if (!params->total_weight) + weight = 1 << FRAC_BITS; + else + weight = instance->weight; + + pa->weighted_req_power = frac_to_int(weight * pa->req_power); + + ret = cdev->ops->state2power(cdev, instance->lower, + &pa->max_power); + if (ret) + continue; + + total_req_power += pa->req_power; + max_allocatable_power += pa->max_power; + total_weighted_req_power += pa->weighted_req_power; + + i++; + } + + power_range = pid_controller(tz, control_temp, max_allocatable_power); + + divvy_up_power(power, num_actors, total_weighted_req_power, + power_range); + + i = 0; + list_for_each_entry(instance, &td->thermal_instances, trip_node) { + struct power_actor *pa = &power[i]; + + if (!power_actor_is_valid(instance)) + continue; + + power_actor_set_power(instance->cdev, instance, + pa->granted_power); + total_granted_power += pa->granted_power; + + trace_thermal_power_actor(tz, i, pa->req_power, + pa->granted_power); + i++; + } + + trace_thermal_power_allocator(tz, total_req_power, total_granted_power, + num_actors, power_range, + max_allocatable_power, tz->temperature, + control_temp - tz->temperature); +} + +/** + * get_governor_trips() - get the two trip points that are key for this governor + * @tz: thermal zone to operate on + * @params: pointer to private data for this governor + * + * The power allocator governor works optimally with two trips points: + * a "switch on" trip point and a "maximum desired temperature". These + * are defined as the first and last passive trip points. + * + * If there is only one trip point, then that's considered to be the + * "maximum desired temperature" trip point and the governor is always + * on. If there are no passive or active trip points, then the + * governor won't do anything. In fact, its throttle function + * won't be called at all. + */ +static void get_governor_trips(struct thermal_zone_device *tz, + struct power_allocator_params *params) +{ + const struct thermal_trip *first_passive = NULL; + const struct thermal_trip *last_passive = NULL; + const struct thermal_trip *last_active = NULL; + const struct thermal_trip_desc *td; + + for_each_trip_desc(tz, td) { + const struct thermal_trip *trip = &td->trip; + + switch (trip->type) { + case THERMAL_TRIP_PASSIVE: + if (!first_passive) { + first_passive = trip; + break; + } + last_passive = trip; + break; + case THERMAL_TRIP_ACTIVE: + last_active = trip; + break; + default: + break; + } + } + + if (last_passive) { + params->trip_switch_on = first_passive; + params->trip_max = last_passive; + } else if (first_passive) { + params->trip_switch_on = NULL; + params->trip_max = first_passive; + } else { + params->trip_switch_on = NULL; + params->trip_max = last_active; + } +} + +static void reset_pid_controller(struct power_allocator_params *params) +{ + params->err_integral = 0; + params->prev_err = 0; +} + +static void allow_maximum_power(struct thermal_zone_device *tz) +{ + struct power_allocator_params *params = tz->governor_data; + const struct thermal_trip_desc *td = trip_to_trip_desc(params->trip_max); + struct thermal_cooling_device *cdev; + struct thermal_instance *instance; + u32 req_power; + + list_for_each_entry(instance, &td->thermal_instances, trip_node) { + if (!power_actor_is_valid(instance)) + continue; + + cdev = instance->cdev; + + instance->target = 0; + scoped_guard(cooling_dev, cdev) { + /* + * Call for updating the cooling devices local stats and + * avoid periods of dozen of seconds when those have not + * been maintained. + */ + cdev->ops->get_requested_power(cdev, &req_power); + + if (params->update_cdevs) + __thermal_cdev_update(cdev); + } + } +} + +/** + * check_power_actors() - Check all cooling devices and warn when they are + * not power actors + * @tz: thermal zone to operate on + * @params: power allocator private data + * + * Check all cooling devices in the @tz and warn every time they are missing + * power actor API. The warning should help to investigate the issue, which + * could be e.g. lack of Energy Model for a given device. + * + * If all of the cooling devices currently attached to @tz implement the power + * actor API, return the number of them (which may be 0, because some cooling + * devices may be attached later). Otherwise, return -EINVAL. + */ +static int check_power_actors(struct thermal_zone_device *tz, + struct power_allocator_params *params) +{ + const struct thermal_trip_desc *td; + struct thermal_instance *instance; + int ret = 0; + + if (!params->trip_max) + return 0; + + td = trip_to_trip_desc(params->trip_max); + + list_for_each_entry(instance, &td->thermal_instances, trip_node) { + if (!cdev_is_power_actor(instance->cdev)) { + dev_warn(&tz->device, "power_allocator: %s is not a power actor\n", + instance->cdev->type); + return -EINVAL; + } + ret++; + } + + return ret; +} + +static int allocate_actors_buffer(struct power_allocator_params *params, + int num_actors) +{ + int ret; + + kfree(params->power); + + /* There might be no cooling devices yet. */ + if (!num_actors) { + ret = 0; + goto clean_state; + } + + params->power = kcalloc(num_actors, sizeof(struct power_actor), + GFP_KERNEL); + if (!params->power) { + ret = -ENOMEM; + goto clean_state; + } + + params->num_actors = num_actors; + params->buffer_size = num_actors * sizeof(struct power_actor); + + return 0; + +clean_state: + params->num_actors = 0; + params->buffer_size = 0; + params->power = NULL; + return ret; +} + +static void power_allocator_update_weight(struct power_allocator_params *params) +{ + const struct thermal_trip_desc *td; + struct thermal_instance *instance; + + if (!params->trip_max) + return; + + td = trip_to_trip_desc(params->trip_max); + + params->total_weight = 0; + list_for_each_entry(instance, &td->thermal_instances, trip_node) + if (power_actor_is_valid(instance)) + params->total_weight += instance->weight; +} + +static void power_allocator_update_tz(struct thermal_zone_device *tz, + enum thermal_notify_event reason) +{ + struct power_allocator_params *params = tz->governor_data; + const struct thermal_trip_desc *td = trip_to_trip_desc(params->trip_max); + struct thermal_instance *instance; + int num_actors = 0; + + switch (reason) { + case THERMAL_TZ_BIND_CDEV: + case THERMAL_TZ_UNBIND_CDEV: + list_for_each_entry(instance, &td->thermal_instances, trip_node) + if (power_actor_is_valid(instance)) + num_actors++; + + if (num_actors != params->num_actors) + allocate_actors_buffer(params, num_actors); + + fallthrough; + case THERMAL_INSTANCE_WEIGHT_CHANGED: + power_allocator_update_weight(params); + break; + default: + break; + } +} + +/** + * power_allocator_bind() - bind the power_allocator governor to a thermal zone + * @tz: thermal zone to bind it to + * + * Initialize the PID controller parameters and bind it to the thermal + * zone. + * + * Return: 0 on success, or -ENOMEM if we ran out of memory, or -EINVAL + * when there are unsupported cooling devices in the @tz. + */ +static int power_allocator_bind(struct thermal_zone_device *tz) +{ + struct power_allocator_params *params; + int ret; + + params = kzalloc(sizeof(*params), GFP_KERNEL); + if (!params) + return -ENOMEM; + + get_governor_trips(tz, params); + + ret = check_power_actors(tz, params); + if (ret < 0) { + dev_warn(&tz->device, "power_allocator: binding failed\n"); + kfree(params); + return ret; + } + + ret = allocate_actors_buffer(params, ret); + if (ret) { + dev_warn(&tz->device, "power_allocator: allocation failed\n"); + kfree(params); + return ret; + } + + if (!tz->tzp) { + tz->tzp = kzalloc(sizeof(*tz->tzp), GFP_KERNEL); + if (!tz->tzp) { + ret = -ENOMEM; + goto free_params; + } + + params->allocated_tzp = true; + } + + if (!tz->tzp->sustainable_power) + dev_warn(&tz->device, "power_allocator: sustainable_power will be estimated\n"); + else + params->sustainable_power = tz->tzp->sustainable_power; + + if (params->trip_max) + estimate_pid_constants(tz, tz->tzp->sustainable_power, + params->trip_switch_on, + params->trip_max->temperature); + + reset_pid_controller(params); + + tz->governor_data = params; + + power_allocator_update_weight(params); + + return 0; + +free_params: + kfree(params->power); + kfree(params); + + return ret; +} + +static void power_allocator_unbind(struct thermal_zone_device *tz) +{ + struct power_allocator_params *params = tz->governor_data; + + dev_dbg(&tz->device, "Unbinding from thermal zone %d\n", tz->id); + + if (params->allocated_tzp) { + kfree(tz->tzp); + tz->tzp = NULL; + } + + kfree(params->power); + kfree(tz->governor_data); + tz->governor_data = NULL; +} + +static void power_allocator_manage(struct thermal_zone_device *tz) +{ + struct power_allocator_params *params = tz->governor_data; + const struct thermal_trip *trip = params->trip_switch_on; + + lockdep_assert_held(&tz->lock); + + if (trip && tz->temperature < trip->temperature) { + reset_pid_controller(params); + allow_maximum_power(tz); + params->update_cdevs = false; + return; + } + + if (!params->trip_max) + return; + + allocate_power(tz, params->trip_max->temperature); + params->update_cdevs = true; +} + +static struct thermal_governor thermal_gov_power_allocator = { + .name = "power_allocator", + .bind_to_tz = power_allocator_bind, + .unbind_from_tz = power_allocator_unbind, + .manage = power_allocator_manage, + .update_tz = power_allocator_update_tz, +}; +THERMAL_GOVERNOR_DECLARE(thermal_gov_power_allocator); |
