diff options
Diffstat (limited to 'drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.c')
| -rw-r--r-- | drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.c | 320 |
1 files changed, 320 insertions, 0 deletions
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.c new file mode 100644 index 000000000000..41003cbcdbfa --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.c @@ -0,0 +1,320 @@ +// SPDX-License-Identifier: MIT +#include <linux/clk.h> +#include <linux/math64.h> +#include <linux/platform_device.h> +#include <linux/pm_opp.h> + +#include <drm/drm_managed.h> + +#include <subdev/clk.h> + +#include "nouveau_drv.h" +#include "nouveau_chan.h" +#include "priv.h" +#include "gk20a_devfreq.h" +#include "gk20a.h" +#include "gp10b.h" + +#define PMU_BUSY_CYCLES_NORM_MAX 1000U + +#define PWR_PMU_IDLE_COUNTER_TOTAL 0U +#define PWR_PMU_IDLE_COUNTER_BUSY 4U + +#define PWR_PMU_IDLE_COUNT_REG_OFFSET 0x0010A508U +#define PWR_PMU_IDLE_COUNT_REG_SIZE 16U +#define PWR_PMU_IDLE_COUNT_MASK 0x7FFFFFFFU +#define PWR_PMU_IDLE_COUNT_RESET_VALUE (0x1U << 31U) + +#define PWR_PMU_IDLE_INTR_REG_OFFSET 0x0010A9E8U +#define PWR_PMU_IDLE_INTR_ENABLE_VALUE 0U + +#define PWR_PMU_IDLE_INTR_STATUS_REG_OFFSET 0x0010A9ECU +#define PWR_PMU_IDLE_INTR_STATUS_MASK 0x00000001U +#define PWR_PMU_IDLE_INTR_STATUS_RESET_VALUE 0x1U + +#define PWR_PMU_IDLE_THRESHOLD_REG_OFFSET 0x0010A8A0U +#define PWR_PMU_IDLE_THRESHOLD_REG_SIZE 4U +#define PWR_PMU_IDLE_THRESHOLD_MAX_VALUE 0x7FFFFFFFU + +#define PWR_PMU_IDLE_CTRL_REG_OFFSET 0x0010A50CU +#define PWR_PMU_IDLE_CTRL_REG_SIZE 16U +#define PWR_PMU_IDLE_CTRL_VALUE_MASK 0x3U +#define PWR_PMU_IDLE_CTRL_VALUE_BUSY 0x2U +#define PWR_PMU_IDLE_CTRL_VALUE_ALWAYS 0x3U +#define PWR_PMU_IDLE_CTRL_FILTER_MASK (0x1U << 2) +#define PWR_PMU_IDLE_CTRL_FILTER_DISABLED 0x0U + +#define PWR_PMU_IDLE_MASK_REG_OFFSET 0x0010A504U +#define PWR_PMU_IDLE_MASK_REG_SIZE 16U +#define PWM_PMU_IDLE_MASK_GR_ENABLED 0x1U +#define PWM_PMU_IDLE_MASK_CE_2_ENABLED 0x200000U + +/** + * struct gk20a_devfreq - Device frequency management + */ +struct gk20a_devfreq { + /** @devfreq: devfreq device. */ + struct devfreq *devfreq; + + /** @regs: Device registers. */ + void __iomem *regs; + + /** @gov_data: Governor data. */ + struct devfreq_simple_ondemand_data gov_data; + + /** @busy_time: Busy time. */ + ktime_t busy_time; + + /** @total_time: Total time. */ + ktime_t total_time; + + /** @time_last_update: Last update time. */ + ktime_t time_last_update; +}; + +static struct gk20a_devfreq *dev_to_gk20a_devfreq(struct device *dev) +{ + struct nouveau_drm *drm = dev_get_drvdata(dev); + struct nvkm_subdev *subdev = nvkm_device_subdev(drm->nvkm, NVKM_SUBDEV_CLK, 0); + struct nvkm_clk *base = nvkm_clk(subdev); + + switch (drm->nvkm->chipset) { + case 0x13b: return gp10b_clk(base)->devfreq; break; + default: return gk20a_clk(base)->devfreq; break; + } +} + +static void gk20a_pmu_init_perfmon_counter(struct gk20a_devfreq *gdevfreq) +{ + u32 data; + + // Set pmu idle intr status bit on total counter overflow + writel(PWR_PMU_IDLE_INTR_ENABLE_VALUE, + gdevfreq->regs + PWR_PMU_IDLE_INTR_REG_OFFSET); + + writel(PWR_PMU_IDLE_THRESHOLD_MAX_VALUE, + gdevfreq->regs + PWR_PMU_IDLE_THRESHOLD_REG_OFFSET + + (PWR_PMU_IDLE_COUNTER_TOTAL * PWR_PMU_IDLE_THRESHOLD_REG_SIZE)); + + // Setup counter for total cycles + data = readl(gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET + + (PWR_PMU_IDLE_COUNTER_TOTAL * PWR_PMU_IDLE_CTRL_REG_SIZE)); + data &= ~(PWR_PMU_IDLE_CTRL_VALUE_MASK | PWR_PMU_IDLE_CTRL_FILTER_MASK); + data |= PWR_PMU_IDLE_CTRL_VALUE_ALWAYS | PWR_PMU_IDLE_CTRL_FILTER_DISABLED; + writel(data, gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET + + (PWR_PMU_IDLE_COUNTER_TOTAL * PWR_PMU_IDLE_CTRL_REG_SIZE)); + + // Setup counter for busy cycles + writel(PWM_PMU_IDLE_MASK_GR_ENABLED | PWM_PMU_IDLE_MASK_CE_2_ENABLED, + gdevfreq->regs + PWR_PMU_IDLE_MASK_REG_OFFSET + + (PWR_PMU_IDLE_COUNTER_BUSY * PWR_PMU_IDLE_MASK_REG_SIZE)); + + data = readl(gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET + + (PWR_PMU_IDLE_COUNTER_BUSY * PWR_PMU_IDLE_CTRL_REG_SIZE)); + data &= ~(PWR_PMU_IDLE_CTRL_VALUE_MASK | PWR_PMU_IDLE_CTRL_FILTER_MASK); + data |= PWR_PMU_IDLE_CTRL_VALUE_BUSY | PWR_PMU_IDLE_CTRL_FILTER_DISABLED; + writel(data, gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET + + (PWR_PMU_IDLE_COUNTER_BUSY * PWR_PMU_IDLE_CTRL_REG_SIZE)); +} + +static u32 gk20a_pmu_read_idle_counter(struct gk20a_devfreq *gdevfreq, u32 counter_id) +{ + u32 ret; + + ret = readl(gdevfreq->regs + PWR_PMU_IDLE_COUNT_REG_OFFSET + + (counter_id * PWR_PMU_IDLE_COUNT_REG_SIZE)); + + return ret & PWR_PMU_IDLE_COUNT_MASK; +} + +static void gk20a_pmu_reset_idle_counter(struct gk20a_devfreq *gdevfreq, u32 counter_id) +{ + writel(PWR_PMU_IDLE_COUNT_RESET_VALUE, gdevfreq->regs + PWR_PMU_IDLE_COUNT_REG_OFFSET + + (counter_id * PWR_PMU_IDLE_COUNT_REG_SIZE)); +} + +static u32 gk20a_pmu_read_idle_intr_status(struct gk20a_devfreq *gdevfreq) +{ + u32 ret; + + ret = readl(gdevfreq->regs + PWR_PMU_IDLE_INTR_STATUS_REG_OFFSET); + + return ret & PWR_PMU_IDLE_INTR_STATUS_MASK; +} + +static void gk20a_pmu_clear_idle_intr_status(struct gk20a_devfreq *gdevfreq) +{ + writel(PWR_PMU_IDLE_INTR_STATUS_RESET_VALUE, + gdevfreq->regs + PWR_PMU_IDLE_INTR_STATUS_REG_OFFSET); +} + +static void gk20a_devfreq_update_utilization(struct gk20a_devfreq *gdevfreq) +{ + ktime_t now, last; + u64 busy_cycles, total_cycles; + u32 norm, intr_status; + + now = ktime_get(); + last = gdevfreq->time_last_update; + gdevfreq->total_time = ktime_us_delta(now, last); + + busy_cycles = gk20a_pmu_read_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_BUSY); + total_cycles = gk20a_pmu_read_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_TOTAL); + intr_status = gk20a_pmu_read_idle_intr_status(gdevfreq); + + gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_BUSY); + gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_TOTAL); + + if (intr_status != 0UL) { + norm = PMU_BUSY_CYCLES_NORM_MAX; + gk20a_pmu_clear_idle_intr_status(gdevfreq); + } else if (total_cycles == 0ULL || busy_cycles > total_cycles) { + norm = PMU_BUSY_CYCLES_NORM_MAX; + } else { + norm = (u32)div64_u64(busy_cycles * PMU_BUSY_CYCLES_NORM_MAX, + total_cycles); + } + + gdevfreq->busy_time = div_u64(gdevfreq->total_time * norm, PMU_BUSY_CYCLES_NORM_MAX); + gdevfreq->time_last_update = now; +} + +static int gk20a_devfreq_target(struct device *dev, unsigned long *freq, + u32 flags) +{ + struct nouveau_drm *drm = dev_get_drvdata(dev); + struct nvkm_subdev *subdev = nvkm_device_subdev(drm->nvkm, NVKM_SUBDEV_CLK, 0); + struct nvkm_clk *base = nvkm_clk(subdev); + struct nvkm_pstate *pstates = base->func->pstates; + int nr_pstates = base->func->nr_pstates; + int i, ret; + + for (i = 0; i < nr_pstates - 1; i++) + if (pstates[i].base.domain[nv_clk_src_gpc] * GK20A_CLK_GPC_MDIV >= *freq) + break; + + ret = nvkm_clk_ustate(base, pstates[i].pstate, 0); + ret |= nvkm_clk_ustate(base, pstates[i].pstate, 1); + if (ret) { + nvkm_error(subdev, "cannot update clock\n"); + return ret; + } + + *freq = pstates[i].base.domain[nv_clk_src_gpc] * GK20A_CLK_GPC_MDIV; + + return 0; +} + +static int gk20a_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) +{ + struct nouveau_drm *drm = dev_get_drvdata(dev); + struct nvkm_subdev *subdev = nvkm_device_subdev(drm->nvkm, NVKM_SUBDEV_CLK, 0); + struct nvkm_clk *base = nvkm_clk(subdev); + + *freq = nvkm_clk_read(base, nv_clk_src_gpc) * GK20A_CLK_GPC_MDIV; + + return 0; +} + +static void gk20a_devfreq_reset(struct gk20a_devfreq *gdevfreq) +{ + gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_BUSY); + gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_TOTAL); + gk20a_pmu_clear_idle_intr_status(gdevfreq); + + gdevfreq->busy_time = 0; + gdevfreq->total_time = 0; + gdevfreq->time_last_update = ktime_get(); +} + +static int gk20a_devfreq_get_dev_status(struct device *dev, + struct devfreq_dev_status *status) +{ + struct nouveau_drm *drm = dev_get_drvdata(dev); + struct gk20a_devfreq *gdevfreq = dev_to_gk20a_devfreq(dev); + + gk20a_devfreq_get_cur_freq(dev, &status->current_frequency); + + gk20a_devfreq_update_utilization(gdevfreq); + + status->busy_time = ktime_to_ns(gdevfreq->busy_time); + status->total_time = ktime_to_ns(gdevfreq->total_time); + + gk20a_devfreq_reset(gdevfreq); + + NV_DEBUG(drm, "busy %lu total %lu %lu %% freq %lu MHz\n", + status->busy_time, status->total_time, + status->busy_time / (status->total_time / 100), + status->current_frequency / 1000 / 1000); + + return 0; +} + +static struct devfreq_dev_profile gk20a_devfreq_profile = { + .timer = DEVFREQ_TIMER_DELAYED, + .polling_ms = 50, + .target = gk20a_devfreq_target, + .get_cur_freq = gk20a_devfreq_get_cur_freq, + .get_dev_status = gk20a_devfreq_get_dev_status, +}; + +int gk20a_devfreq_init(struct nvkm_clk *base, struct gk20a_devfreq **gdevfreq) +{ + struct nvkm_device *device = base->subdev.device; + struct nouveau_drm *drm = dev_get_drvdata(device->dev); + struct nvkm_device_tegra *tdev = device->func->tegra(device); + struct nvkm_pstate *pstates = base->func->pstates; + int nr_pstates = base->func->nr_pstates; + struct gk20a_devfreq *new_gdevfreq; + int i; + + new_gdevfreq = drmm_kzalloc(drm->dev, sizeof(struct gk20a_devfreq), GFP_KERNEL); + if (!new_gdevfreq) + return -ENOMEM; + + new_gdevfreq->regs = tdev->regs; + + for (i = 0; i < nr_pstates; i++) + dev_pm_opp_add(base->subdev.device->dev, + pstates[i].base.domain[nv_clk_src_gpc] * GK20A_CLK_GPC_MDIV, 0); + + gk20a_pmu_init_perfmon_counter(new_gdevfreq); + gk20a_devfreq_reset(new_gdevfreq); + + gk20a_devfreq_profile.initial_freq = + nvkm_clk_read(base, nv_clk_src_gpc) * GK20A_CLK_GPC_MDIV; + + new_gdevfreq->gov_data.upthreshold = 45; + new_gdevfreq->gov_data.downdifferential = 5; + + new_gdevfreq->devfreq = devm_devfreq_add_device(device->dev, + &gk20a_devfreq_profile, + DEVFREQ_GOV_SIMPLE_ONDEMAND, + &new_gdevfreq->gov_data); + if (IS_ERR(new_gdevfreq->devfreq)) + return PTR_ERR(new_gdevfreq->devfreq); + + *gdevfreq = new_gdevfreq; + + return 0; +} + +int gk20a_devfreq_resume(struct device *dev) +{ + struct gk20a_devfreq *gdevfreq = dev_to_gk20a_devfreq(dev); + + if (!gdevfreq || !gdevfreq->devfreq) + return 0; + + return devfreq_resume_device(gdevfreq->devfreq); +} + +int gk20a_devfreq_suspend(struct device *dev) +{ + struct gk20a_devfreq *gdevfreq = dev_to_gk20a_devfreq(dev); + + if (!gdevfreq || !gdevfreq->devfreq) + return 0; + + return devfreq_suspend_device(gdevfreq->devfreq); +} |
