diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_rps.c')
| -rw-r--r-- | drivers/gpu/drm/i915/gt/intel_rps.c | 1130 |
1 files changed, 959 insertions, 171 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 06e9a8ed4e03..b01c837ab646 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -3,16 +3,30 @@ * Copyright © 2019 Intel Corporation */ -#include <drm/i915_drm.h> +#include <linux/string_helpers.h> + +#include <drm/intel/i915_drm.h> + +#include "display/intel_display_rps.h" +#include "display/vlv_clock.h" +#include "soc/intel_dram.h" #include "i915_drv.h" +#include "i915_irq.h" +#include "i915_reg.h" +#include "i915_wait_util.h" #include "intel_breadcrumbs.h" #include "intel_gt.h" #include "intel_gt_clock_utils.h" #include "intel_gt_irq.h" +#include "intel_gt_pm.h" #include "intel_gt_pm_irq.h" +#include "intel_gt_print.h" +#include "intel_gt_regs.h" +#include "intel_mchbar_regs.h" +#include "intel_pcode.h" #include "intel_rps.h" -#include "intel_sideband.h" +#include "vlv_iosf_sb.h" #include "../../../platform/x86/intel_ips.h" #define BUSY_MAX_EI 20u /* ms */ @@ -37,6 +51,20 @@ static struct intel_uncore *rps_to_uncore(struct intel_rps *rps) return rps_to_gt(rps)->uncore; } +static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + return >_to_guc(gt)->slpc; +} + +static bool rps_uses_slpc(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + return intel_uc_uses_guc_slpc(>->uc); +} + static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask) { return mask & ~rps->pm_intrmsk_mbz; @@ -49,14 +77,15 @@ static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) static void rps_timer(struct timer_list *t) { - struct intel_rps *rps = from_timer(rps, t, timer); + struct intel_rps *rps = timer_container_of(rps, t, timer); + struct intel_gt *gt = rps_to_gt(rps); struct intel_engine_cs *engine; ktime_t dt, last, timestamp; enum intel_engine_id id; s64 max_busy[3] = {}; timestamp = 0; - for_each_engine(engine, rps_to_gt(rps), id) { + for_each_engine(engine, gt, id) { s64 busy; int i; @@ -100,7 +129,7 @@ static void rps_timer(struct timer_list *t) busy += div_u64(max_busy[i], 1 << i); } - GT_TRACE(rps_to_gt(rps), + GT_TRACE(gt, "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n", busy, (int)div64_u64(100 * busy, dt), max_busy[0], max_busy[1], max_busy[2], @@ -110,12 +139,12 @@ static void rps_timer(struct timer_list *t) rps->cur_freq < rps->max_freq_softlimit) { rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD; rps->pm_interval = 1; - schedule_work(&rps->work); + queue_work(gt->i915->unordered_wq, &rps->work); } else if (100 * busy < rps->power.down_threshold * dt && rps->cur_freq > rps->min_freq_softlimit) { rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD; rps->pm_interval = 1; - schedule_work(&rps->work); + queue_work(gt->i915->unordered_wq, &rps->work); } else { rps->last_adj = 0; } @@ -135,7 +164,7 @@ static void rps_start_timer(struct intel_rps *rps) static void rps_stop_timer(struct intel_rps *rps) { - del_timer_sync(&rps->timer); + timer_delete_sync(&rps->timer); rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); cancel_work_sync(&rps->work); } @@ -167,14 +196,16 @@ static void rps_enable_interrupts(struct intel_rps *rps) { struct intel_gt *gt = rps_to_gt(rps); + GEM_BUG_ON(rps_uses_slpc(rps)); + GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n", rps->pm_events, rps_pm_mask(rps, rps->last_freq)); rps_reset_ei(rps); - spin_lock_irq(>->irq_lock); + spin_lock_irq(gt->irq_lock); gen6_gt_pm_enable_irq(gt, rps->pm_events); - spin_unlock_irq(>->irq_lock); + spin_unlock_irq(gt->irq_lock); intel_uncore_write(gt->uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, rps->last_freq)); @@ -195,14 +226,14 @@ static void rps_reset_interrupts(struct intel_rps *rps) { struct intel_gt *gt = rps_to_gt(rps); - spin_lock_irq(>->irq_lock); + spin_lock_irq(gt->irq_lock); if (GRAPHICS_VER(gt->i915) >= 11) gen11_rps_reset_interrupts(rps); else gen6_rps_reset_interrupts(rps); rps->pm_iir = 0; - spin_unlock_irq(>->irq_lock); + spin_unlock_irq(gt->irq_lock); } static void rps_disable_interrupts(struct intel_rps *rps) @@ -212,9 +243,9 @@ static void rps_disable_interrupts(struct intel_rps *rps) intel_uncore_write(gt->uncore, GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u)); - spin_lock_irq(>->irq_lock); + spin_lock_irq(gt->irq_lock); gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS); - spin_unlock_irq(>->irq_lock); + spin_unlock_irq(gt->irq_lock); intel_synchronize_irq(gt->i915); @@ -237,10 +268,10 @@ static const struct cparams { u16 c; } cparams[] = { { 1, 1333, 301, 28664 }, - { 1, 1066, 294, 24460 }, + { 1, 1067, 294, 24460 }, { 1, 800, 294, 25192 }, { 0, 1333, 276, 27605 }, - { 0, 1066, 276, 27605 }, + { 0, 1067, 276, 27605 }, { 0, 800, 231, 23784 }, }; @@ -248,19 +279,24 @@ static void gen5_rps_init(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); struct intel_uncore *uncore = rps_to_uncore(rps); + unsigned int fsb_freq, mem_freq; u8 fmax, fmin, fstart; u32 rgvmodectl; int c_m, i; - if (i915->fsb_freq <= 3200) + fsb_freq = intel_fsb_freq(i915); + mem_freq = intel_mem_freq(i915); + + if (fsb_freq <= 3200000) c_m = 0; - else if (i915->fsb_freq <= 4800) + else if (fsb_freq <= 4800000) c_m = 1; else c_m = 2; for (i = 0; i < ARRAY_SIZE(cparams); i++) { - if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) { + if (cparams[i].i == c_m && + cparams[i].t == DIV_ROUND_CLOSEST(mem_freq, 1000)) { rps->ips.m = cparams[i].m; rps->ips.c = cparams[i].c; break; @@ -408,7 +444,8 @@ static int __gen5_rps_set(struct intel_rps *rps, u8 val) rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); if (rgvswctl & MEMCTL_CMD_STS) { - DRM_DEBUG("gpu busy, RCS change rejected\n"); + drm_dbg(&rps_to_i915(rps)->drm, + "gpu busy, RCS change rejected\n"); return -EBUSY; /* still busy with another command */ } @@ -520,6 +557,7 @@ static unsigned int init_emon(struct intel_uncore *uncore) static bool gen5_rps_enable(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_display *display = i915->display; struct intel_uncore *uncore = rps_to_uncore(rps); u8 fstart, vstart; u32 rgvmodectl; @@ -577,9 +615,7 @@ static bool gen5_rps_enable(struct intel_rps *rps) rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC); rps->ips.last_time2 = ktime_get_raw_ns(); - spin_lock(&i915->irq_lock); - ilk_enable_display_irq(i915, DE_PCU_EVENT); - spin_unlock(&i915->irq_lock); + ilk_display_rps_enable(display); spin_unlock_irq(&mchdev_lock); @@ -591,21 +627,18 @@ static bool gen5_rps_enable(struct intel_rps *rps) static void gen5_rps_disable(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_display *display = i915->display; struct intel_uncore *uncore = rps_to_uncore(rps); u16 rgvswctl; spin_lock_irq(&mchdev_lock); - spin_lock(&i915->irq_lock); - ilk_disable_display_irq(i915, DE_PCU_EVENT); - spin_unlock(&i915->irq_lock); + ilk_display_rps_disable(display); rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); /* Ack interrupts, disable EFC interrupt */ - intel_uncore_write(uncore, MEMINTREN, - intel_uncore_read(uncore, MEMINTREN) & - ~MEMINT_EVAL_CHG_EN); + intel_uncore_rmw(uncore, MEMINTREN, MEMINT_EVAL_CHG_EN, 0); intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); /* Go back to the starting frequency */ @@ -647,7 +680,6 @@ static void rps_set_power(struct intel_rps *rps, int new_power) { struct intel_gt *gt = rps_to_gt(rps); struct intel_uncore *uncore = gt->uncore; - u32 threshold_up = 0, threshold_down = 0; /* in % */ u32 ei_up = 0, ei_down = 0; lockdep_assert_held(&rps->power.mutex); @@ -655,9 +687,6 @@ static void rps_set_power(struct intel_rps *rps, int new_power) if (new_power == rps->power.mode) return; - threshold_up = 95; - threshold_down = 85; - /* Note the units here are not exactly 1us, but 1280ns. */ switch (new_power) { case LOW_POWER: @@ -684,17 +713,22 @@ static void rps_set_power(struct intel_rps *rps, int new_power) GT_TRACE(gt, "changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n", - new_power, threshold_up, ei_up, threshold_down, ei_down); + new_power, + rps->power.up_threshold, ei_up, + rps->power.down_threshold, ei_down); set(uncore, GEN6_RP_UP_EI, intel_gt_ns_to_pm_interval(gt, ei_up * 1000)); set(uncore, GEN6_RP_UP_THRESHOLD, - intel_gt_ns_to_pm_interval(gt, ei_up * threshold_up * 10)); + intel_gt_ns_to_pm_interval(gt, + ei_up * rps->power.up_threshold * 10)); set(uncore, GEN6_RP_DOWN_EI, intel_gt_ns_to_pm_interval(gt, ei_down * 1000)); set(uncore, GEN6_RP_DOWN_THRESHOLD, - intel_gt_ns_to_pm_interval(gt, ei_down * threshold_down * 10)); + intel_gt_ns_to_pm_interval(gt, + ei_down * + rps->power.down_threshold * 10)); set(uncore, GEN6_RP_CONTROL, (GRAPHICS_VER(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | @@ -706,8 +740,6 @@ static void rps_set_power(struct intel_rps *rps, int new_power) skip_hw_write: rps->power.mode = new_power; - rps->power.up_threshold = threshold_up; - rps->power.down_threshold = threshold_down; } static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val) @@ -752,7 +784,8 @@ static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val) void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive) { - GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n", yesno(interactive)); + GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n", + str_yes_no(interactive)); mutex_lock(&rps->power.mutex); if (interactive) { @@ -771,6 +804,8 @@ static int gen6_rps_set(struct intel_rps *rps, u8 val) struct drm_i915_private *i915 = rps_to_i915(rps); u32 swreq; + GEM_BUG_ON(rps_uses_slpc(rps)); + if (GRAPHICS_VER(i915) >= 9) swreq = GEN9_FREQUENCY(val); else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) @@ -792,9 +827,9 @@ static int vlv_rps_set(struct intel_rps *rps, u8 val) struct drm_i915_private *i915 = rps_to_i915(rps); int err; - vlv_punit_get(i915); - err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val); - vlv_punit_put(i915); + vlv_iosf_sb_get(&i915->drm, BIT(VLV_IOSF_SB_PUNIT)); + err = vlv_iosf_sb_write(&i915->drm, VLV_IOSF_SB_PUNIT, PUNIT_REG_GPU_FREQ_REQ, val); + vlv_iosf_sb_put(&i915->drm, BIT(VLV_IOSF_SB_PUNIT)); GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d\n", val, intel_gpu_freq(rps, val)); @@ -861,7 +896,8 @@ void intel_rps_park(struct intel_rps *rps) { int adj; - GEM_BUG_ON(atomic_read(&rps->num_waiters)); + if (!intel_rps_is_enabled(rps)) + return; if (!intel_rps_clear_active(rps)) return; @@ -916,15 +952,110 @@ void intel_rps_park(struct intel_rps *rps) GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq); } +u32 intel_rps_get_boost_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc; + + if (rps_uses_slpc(rps)) { + slpc = rps_to_slpc(rps); + + return slpc->boost_freq; + } else { + return intel_gpu_freq(rps, rps->boost_freq); + } +} + +static int rps_set_boost_freq(struct intel_rps *rps, u32 val) +{ + bool boost = false; + + /* Validate against (static) hardware limits */ + val = intel_freq_opcode(rps, val); + if (val < rps->min_freq || val > rps->max_freq) + return -EINVAL; + + mutex_lock(&rps->lock); + if (val != rps->boost_freq) { + rps->boost_freq = val; + boost = atomic_read(&rps->num_waiters); + } + mutex_unlock(&rps->lock); + if (boost) + queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work); + + return 0; +} + +int intel_rps_set_boost_frequency(struct intel_rps *rps, u32 freq) +{ + struct intel_guc_slpc *slpc; + + if (rps_uses_slpc(rps)) { + slpc = rps_to_slpc(rps); + + return intel_guc_slpc_set_boost_freq(slpc, freq); + } else { + return rps_set_boost_freq(rps, freq); + } +} + +void intel_rps_dec_waiters(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc; + + if (rps_uses_slpc(rps)) { + slpc = rps_to_slpc(rps); + + /* Don't decrement num_waiters for req where increment was skipped */ + if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING) + return; + + intel_guc_slpc_dec_waiters(slpc); + } else { + atomic_dec(&rps->num_waiters); + } +} + void intel_rps_boost(struct i915_request *rq) { + struct intel_guc_slpc *slpc; + if (i915_request_signaled(rq) || i915_request_has_waitboost(rq)) return; + /* Waitboost is not needed for contexts marked with a Freq hint */ + if (test_bit(CONTEXT_LOW_LATENCY, &rq->context->flags)) + return; + /* Serializes with i915_request_retire() */ if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) { struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; + if (rps_uses_slpc(rps)) { + slpc = rps_to_slpc(rps); + + /* Waitboost should not be done with power saving profile */ + if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING) + return; + + /* Return if old value is non zero */ + if (!atomic_fetch_inc(&slpc->num_waiters)) { + /* + * Skip queuing boost work if frequency is already boosted, + * but still increment num_waiters. + */ + if (slpc->min_freq_softlimit >= slpc->boost_freq) + return; + + GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", + rq->fence.context, rq->fence.seqno); + queue_work(rps_to_gt(rps)->i915->unordered_wq, + &slpc->boost_work); + } + + return; + } + if (atomic_fetch_inc(&rps->num_waiters)) return; @@ -935,7 +1066,7 @@ void intel_rps_boost(struct i915_request *rq) rq->fence.context, rq->fence.seqno); if (READ_ONCE(rps->cur_freq) < rps->boost_freq) - schedule_work(&rps->work); + queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work); WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */ } @@ -972,56 +1103,118 @@ int intel_rps_set(struct intel_rps *rps, u8 val) return 0; } -static void gen6_rps_init(struct intel_rps *rps) +static u32 intel_rps_read_state_cap(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); struct intel_uncore *uncore = rps_to_uncore(rps); - /* All of these values are in units of 50MHz */ + if (IS_GEN9_LP(i915)) + return intel_uncore_read(uncore, BXT_RP_STATE_CAP); + else + return intel_uncore_read(uncore, GEN6_RP_STATE_CAP); +} + +static void +mtl_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u32 rp_state_cap = rps_to_gt(rps)->type == GT_MEDIA ? + intel_uncore_read(uncore, MTL_MEDIAP_STATE_CAP) : + intel_uncore_read(uncore, MTL_RP_STATE_CAP); + u32 rpe = rps_to_gt(rps)->type == GT_MEDIA ? + intel_uncore_read(uncore, MTL_MPE_FREQUENCY) : + intel_uncore_read(uncore, MTL_GT_RPE_FREQUENCY); + + /* MTL values are in units of 16.67 MHz */ + caps->rp0_freq = REG_FIELD_GET(MTL_RP0_CAP_MASK, rp_state_cap); + caps->min_freq = REG_FIELD_GET(MTL_RPN_CAP_MASK, rp_state_cap); + caps->rp1_freq = REG_FIELD_GET(MTL_RPE_MASK, rpe); +} + +static void +__gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 rp_state_cap; + + rp_state_cap = intel_rps_read_state_cap(rps); /* static values from HW: RP0 > RP1 > RPn (min_freq) */ if (IS_GEN9_LP(i915)) { - u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP); - - rps->rp0_freq = (rp_state_cap >> 16) & 0xff; - rps->rp1_freq = (rp_state_cap >> 8) & 0xff; - rps->min_freq = (rp_state_cap >> 0) & 0xff; + caps->rp0_freq = (rp_state_cap >> 16) & 0xff; + caps->rp1_freq = (rp_state_cap >> 8) & 0xff; + caps->min_freq = (rp_state_cap >> 0) & 0xff; } else { - u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP); + caps->rp0_freq = (rp_state_cap >> 0) & 0xff; + if (GRAPHICS_VER(i915) >= 10) + caps->rp1_freq = REG_FIELD_GET(RPE_MASK, + intel_uncore_read(to_gt(i915)->uncore, + GEN10_FREQ_INFO_REC)); + else + caps->rp1_freq = (rp_state_cap >> 8) & 0xff; + caps->min_freq = (rp_state_cap >> 16) & 0xff; + } - rps->rp0_freq = (rp_state_cap >> 0) & 0xff; - rps->rp1_freq = (rp_state_cap >> 8) & 0xff; - rps->min_freq = (rp_state_cap >> 16) & 0xff; + if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { + /* + * In this case rp_state_cap register reports frequencies in + * units of 50 MHz. Convert these to the actual "hw unit", i.e. + * units of 16.67 MHz + */ + caps->rp0_freq *= GEN9_FREQ_SCALER; + caps->rp1_freq *= GEN9_FREQ_SCALER; + caps->min_freq *= GEN9_FREQ_SCALER; } +} + +/** + * gen6_rps_get_freq_caps - Get freq caps exposed by HW + * @rps: the intel_rps structure + * @caps: returned freq caps + * + * Returned "caps" frequencies should be converted to MHz using + * intel_gpu_freq() + */ +void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + return mtl_get_freq_caps(rps, caps); + else + return __gen6_rps_get_freq_caps(rps, caps); +} + +static void gen6_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_rps_freq_caps caps; + + gen6_rps_get_freq_caps(rps, &caps); + rps->rp0_freq = caps.rp0_freq; + rps->rp1_freq = caps.rp1_freq; + rps->min_freq = caps.min_freq; /* hw_max = RP0 until we check for overclocking */ rps->max_freq = rps->rp0_freq; rps->efficient_freq = rps->rp1_freq; if (IS_HASWELL(i915) || IS_BROADWELL(i915) || - IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 10) { + IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { u32 ddcc_status = 0; + u32 mult = 1; - if (sandybridge_pcode_read(i915, - HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, - &ddcc_status, NULL) == 0) + if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) + mult = GEN9_FREQ_SCALER; + if (snb_pcode_read(rps_to_gt(rps)->uncore, + HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, + &ddcc_status, NULL) == 0) rps->efficient_freq = - clamp_t(u8, - (ddcc_status >> 8) & 0xff, + clamp_t(u32, + ((ddcc_status >> 8) & 0xff) * mult, rps->min_freq, rps->max_freq); } - - if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 10) { - /* Store the frequency values in 16.66 MHZ units, which is - * the natural hardware unit for SKL - */ - rps->rp0_freq *= GEN9_FREQ_SCALER; - rps->rp1_freq *= GEN9_FREQ_SCALER; - rps->min_freq *= GEN9_FREQ_SCALER; - rps->max_freq *= GEN9_FREQ_SCALER; - rps->efficient_freq *= GEN9_FREQ_SCALER; - } } static bool rps_reset(struct intel_rps *rps) @@ -1094,7 +1287,7 @@ static int chv_rps_max_freq(struct intel_rps *rps) struct intel_gt *gt = rps_to_gt(rps); u32 val; - val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); + val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, FB_GFX_FMAX_AT_VMAX_FUSE); switch (gt->info.sseu.eu_total) { case 8: @@ -1121,7 +1314,7 @@ static int chv_rps_rpe_freq(struct intel_rps *rps) struct drm_i915_private *i915 = rps_to_i915(rps); u32 val; - val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG); + val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, PUNIT_GPU_DUTYCYCLE_REG); val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT; return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; @@ -1132,7 +1325,7 @@ static int chv_rps_guar_freq(struct intel_rps *rps) struct drm_i915_private *i915 = rps_to_i915(rps); u32 val; - val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); + val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, FB_GFX_FMAX_AT_VMAX_FUSE); return val & FB_GFX_FREQ_FUSE_MASK; } @@ -1142,7 +1335,7 @@ static u32 chv_rps_min_freq(struct intel_rps *rps) struct drm_i915_private *i915 = rps_to_i915(rps); u32 val; - val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE); + val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, FB_GFX_FMIN_AT_VMIN_FUSE); val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT; return val & FB_GFX_FREQ_FUSE_MASK; @@ -1176,20 +1369,21 @@ static bool chv_rps_enable(struct intel_rps *rps) GEN6_PM_RP_DOWN_TIMEOUT); /* Setting Fixed Bias */ - vlv_punit_get(i915); + vlv_iosf_sb_get(&i915->drm, BIT(VLV_IOSF_SB_PUNIT)); val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50; - vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); + vlv_iosf_sb_write(&i915->drm, VLV_IOSF_SB_PUNIT, VLV_TURBO_SOC_OVERRIDE, val); - val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, PUNIT_REG_GPU_FREQ_STS); - vlv_punit_put(i915); + vlv_iosf_sb_put(&i915->drm, BIT(VLV_IOSF_SB_PUNIT)); /* RPS code assumes GPLL is used */ drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0, "GPLL not enabled\n"); - drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE)); + drm_dbg(&i915->drm, "GPLL enabled? %s\n", + str_yes_no(val & GPLLENABLE)); drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val); return rps_reset(rps); @@ -1200,7 +1394,7 @@ static int vlv_rps_guar_freq(struct intel_rps *rps) struct drm_i915_private *i915 = rps_to_i915(rps); u32 val, rp1; - val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); + val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_NC, IOSF_NC_FB_GFX_FREQ_FUSE); rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK; rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; @@ -1213,7 +1407,7 @@ static int vlv_rps_max_freq(struct intel_rps *rps) struct drm_i915_private *i915 = rps_to_i915(rps); u32 val, rp0; - val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); + val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_NC, IOSF_NC_FB_GFX_FREQ_FUSE); rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; /* Clamp to max */ @@ -1227,9 +1421,9 @@ static int vlv_rps_rpe_freq(struct intel_rps *rps) struct drm_i915_private *i915 = rps_to_i915(rps); u32 val, rpe; - val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO); + val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_NC, IOSF_NC_FB_GFX_FMAX_FUSE_LO); rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; - val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI); + val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_NC, IOSF_NC_FB_GFX_FMAX_FUSE_HI); rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; return rpe; @@ -1240,7 +1434,7 @@ static int vlv_rps_min_freq(struct intel_rps *rps) struct drm_i915_private *i915 = rps_to_i915(rps); u32 val; - val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff; + val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, PUNIT_REG_GPU_LFM) & 0xff; /* * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on @@ -1276,21 +1470,22 @@ static bool vlv_rps_enable(struct intel_rps *rps) /* WaGsvRC0ResidencyMethod:vlv */ rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED; - vlv_punit_get(i915); + vlv_iosf_sb_get(&i915->drm, BIT(VLV_IOSF_SB_PUNIT)); /* Setting Fixed Bias */ val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875; - vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); + vlv_iosf_sb_write(&i915->drm, VLV_IOSF_SB_PUNIT, VLV_TURBO_SOC_OVERRIDE, val); - val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, PUNIT_REG_GPU_FREQ_STS); - vlv_punit_put(i915); + vlv_iosf_sb_put(&i915->drm, BIT(VLV_IOSF_SB_PUNIT)); /* RPS code assumes GPLL is used */ drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0, "GPLL not enabled\n"); - drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE)); + drm_dbg(&i915->drm, "GPLL enabled? %s\n", + str_yes_no(val & GPLLENABLE)); drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val); return rps_reset(rps); @@ -1356,6 +1551,9 @@ void intel_rps_enable(struct intel_rps *rps) if (!HAS_RPS(i915)) return; + if (rps_uses_slpc(rps)) + return; + intel_gt_check_clock_frequency(rps_to_gt(rps)); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); @@ -1380,10 +1578,12 @@ void intel_rps_enable(struct intel_rps *rps) return; GT_TRACE(rps_to_gt(rps), - "min:%x, max:%x, freq:[%d, %d]\n", + "min:%x, max:%x, freq:[%d, %d], thresholds:[%u, %u]\n", rps->min_freq, rps->max_freq, intel_gpu_freq(rps, rps->min_freq), - intel_gpu_freq(rps, rps->max_freq)); + intel_gpu_freq(rps, rps->max_freq), + rps->power.up_threshold, + rps->power.down_threshold); GEM_BUG_ON(rps->max_freq < rps->min_freq); GEM_BUG_ON(rps->idle_freq > rps->max_freq); @@ -1393,7 +1593,7 @@ void intel_rps_enable(struct intel_rps *rps) if (has_busy_stats(rps)) intel_rps_set_timer(rps); - else if (GRAPHICS_VER(i915) >= 6) + else if (GRAPHICS_VER(i915) >= 6 && GRAPHICS_VER(i915) <= 11) intel_rps_set_interrupts(rps); else /* Ironlake currently uses intel_ips.ko */ {} @@ -1410,6 +1610,9 @@ void intel_rps_disable(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); + if (!intel_rps_is_enabled(rps)) + return; + intel_rps_clear_enabled(rps); intel_rps_clear_interrupts(rps); intel_rps_clear_timer(rps); @@ -1487,10 +1690,7 @@ static void vlv_init_gpll_ref_freq(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); - rps->gpll_ref_freq = - vlv_get_cck_clock(i915, "GPLL ref", - CCK_GPLL_CLOCK_CONTROL, - i915->czclk_freq); + rps->gpll_ref_freq = vlv_clock_get_gpll(&i915->drm); drm_dbg(&i915->drm, "GPLL reference freq: %d kHz\n", rps->gpll_ref_freq); @@ -1499,30 +1699,14 @@ static void vlv_init_gpll_ref_freq(struct intel_rps *rps) static void vlv_rps_init(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); - u32 val; - vlv_iosf_sb_get(i915, + vlv_init_gpll_ref_freq(rps); + + vlv_iosf_sb_get(&i915->drm, BIT(VLV_IOSF_SB_PUNIT) | BIT(VLV_IOSF_SB_NC) | BIT(VLV_IOSF_SB_CCK)); - vlv_init_gpll_ref_freq(rps); - - val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); - switch ((val >> 6) & 3) { - case 0: - case 1: - i915->mem_freq = 800; - break; - case 2: - i915->mem_freq = 1066; - break; - case 3: - i915->mem_freq = 1333; - break; - } - drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq); - rps->max_freq = vlv_rps_max_freq(rps); rps->rp0_freq = rps->max_freq; drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n", @@ -1540,7 +1724,7 @@ static void vlv_rps_init(struct intel_rps *rps) drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n", intel_gpu_freq(rps, rps->min_freq), rps->min_freq); - vlv_iosf_sb_put(i915, + vlv_iosf_sb_put(&i915->drm, BIT(VLV_IOSF_SB_PUNIT) | BIT(VLV_IOSF_SB_NC) | BIT(VLV_IOSF_SB_CCK)); @@ -1549,27 +1733,14 @@ static void vlv_rps_init(struct intel_rps *rps) static void chv_rps_init(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); - u32 val; - vlv_iosf_sb_get(i915, + vlv_init_gpll_ref_freq(rps); + + vlv_iosf_sb_get(&i915->drm, BIT(VLV_IOSF_SB_PUNIT) | BIT(VLV_IOSF_SB_NC) | BIT(VLV_IOSF_SB_CCK)); - vlv_init_gpll_ref_freq(rps); - - val = vlv_cck_read(i915, CCK_FUSE_REG); - - switch ((val >> 2) & 0x7) { - case 3: - i915->mem_freq = 2000; - break; - default: - i915->mem_freq = 1600; - break; - } - drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq); - rps->max_freq = chv_rps_max_freq(rps); rps->rp0_freq = rps->max_freq; drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n", @@ -1587,7 +1758,7 @@ static void chv_rps_init(struct intel_rps *rps) drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n", intel_gpu_freq(rps, rps->min_freq), rps->min_freq); - vlv_iosf_sb_put(i915, + vlv_iosf_sb_put(&i915->drm, BIT(VLV_IOSF_SB_PUNIT) | BIT(VLV_IOSF_SB_NC) | BIT(VLV_IOSF_SB_CCK)); @@ -1606,6 +1777,7 @@ static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei) static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir) { + struct drm_i915_private *i915 = rps_to_i915(rps); struct intel_uncore *uncore = rps_to_uncore(rps); const struct intel_rps_ei *prev = &rps->ei; struct intel_rps_ei now; @@ -1622,7 +1794,7 @@ static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir) time = ktime_us_delta(now.ktime, prev->ktime); - time *= rps_to_i915(rps)->czclk_freq; + time *= vlv_clock_get_czclk(&i915->drm); /* Workload can be split between render + media, * e.g. SwapBuffers being blitted in X after being rendered in @@ -1653,10 +1825,10 @@ static void rps_work(struct work_struct *work) int new_freq, adj, min, max; u32 pm_iir = 0; - spin_lock_irq(>->irq_lock); + spin_lock_irq(gt->irq_lock); pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events; client_boost = atomic_read(&rps->num_waiters); - spin_unlock_irq(>->irq_lock); + spin_unlock_irq(gt->irq_lock); /* Make sure we didn't queue anything we're not going to process. */ if (!pm_iir && !client_boost) @@ -1679,7 +1851,7 @@ static void rps_work(struct work_struct *work) GT_TRACE(gt, "pm_iir:%x, client_boost:%s, last:%d, cur:%x, min:%x, max:%x\n", - pm_iir, yesno(client_boost), + pm_iir, str_yes_no(client_boost), adj, new_freq, min, max); if (client_boost && new_freq < rps->boost_freq) { @@ -1729,9 +1901,9 @@ static void rps_work(struct work_struct *work) mutex_unlock(&rps->lock); out: - spin_lock_irq(>->irq_lock); + spin_lock_irq(gt->irq_lock); gen6_gt_pm_unmask_irq(gt, rps->pm_events); - spin_unlock_irq(>->irq_lock); + spin_unlock_irq(gt->irq_lock); } void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) @@ -1739,7 +1911,7 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) struct intel_gt *gt = rps_to_gt(rps); const u32 events = rps->pm_events & pm_iir; - lockdep_assert_held(>->irq_lock); + lockdep_assert_held(gt->irq_lock); if (unlikely(!events)) return; @@ -1749,7 +1921,7 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) gen6_gt_pm_mask_irq(gt, events); rps->pm_iir |= events; - schedule_work(&rps->work); + queue_work(gt->i915->unordered_wq, &rps->work); } void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) @@ -1759,15 +1931,15 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) events = pm_iir & rps->pm_events; if (events) { - spin_lock(>->irq_lock); + spin_lock(gt->irq_lock); GT_TRACE(gt, "irq events:%x\n", events); gen6_gt_pm_mask_irq(gt, events); rps->pm_iir |= events; - schedule_work(&rps->work); - spin_unlock(>->irq_lock); + queue_work(gt->i915->unordered_wq, &rps->work); + spin_unlock(gt->irq_lock); } if (GRAPHICS_VER(gt->i915) >= 8) @@ -1777,7 +1949,8 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) intel_engine_cs_irq(gt->engine[VECS0], pm_iir >> 10); if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) - DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); + drm_dbg(&rps_to_i915(rps)->drm, + "Command parser error, pm_iir 0x%08x\n", pm_iir); } void gen5_rps_irq_handler(struct intel_rps *rps) @@ -1829,6 +2002,9 @@ void intel_rps_init(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); + if (rps_uses_slpc(rps)) + return; + if (IS_CHERRYVIEW(i915)) chv_rps_init(rps); else if (IS_VALLEYVIEW(i915)) @@ -1840,14 +2016,15 @@ void intel_rps_init(struct intel_rps *rps) /* Derive initial user preferences/limits from the hardware limits */ rps->max_freq_softlimit = rps->max_freq; + rps_to_gt(rps)->defaults.max_freq = rps->max_freq_softlimit; rps->min_freq_softlimit = rps->min_freq; + rps_to_gt(rps)->defaults.min_freq = rps->min_freq_softlimit; /* After setting max-softlimit, find the overclock max freq */ if (GRAPHICS_VER(i915) == 6 || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) { u32 params = 0; - sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS, - ¶ms, NULL); + snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_READ_OC_PARAMS, ¶ms, NULL); if (params & BIT(31)) { /* OC supported */ drm_dbg(&i915->drm, "Overclocking supported, max: %dMHz, overclock: %dMHz\n", @@ -1857,6 +2034,12 @@ void intel_rps_init(struct intel_rps *rps) } } + /* Set default thresholds in % */ + rps->power.up_threshold = 95; + rps_to_gt(rps)->defaults.rps_up_threshold = rps->power.up_threshold; + rps->power.down_threshold = 85; + rps_to_gt(rps)->defaults.rps_down_threshold = rps->power.down_threshold; + /* Finally allow us to boost to max by default */ rps->boost_freq = rps->max_freq; rps->idle_freq = rps->min_freq; @@ -1877,53 +2060,90 @@ void intel_rps_init(struct intel_rps *rps) if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) < 11) rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + + /* GuC needs ARAT expired interrupt unmasked */ + if (intel_uc_uses_guc_submission(&rps_to_gt(rps)->uc)) + rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; } void intel_rps_sanitize(struct intel_rps *rps) { + if (rps_uses_slpc(rps)) + return; + if (GRAPHICS_VER(rps_to_i915(rps)) >= 6) rps_disable_interrupts(rps); } -u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) +u32 intel_rps_read_rpstat(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + i915_reg_t rpstat; + + rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1; + + return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat); +} + +static u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) { struct drm_i915_private *i915 = rps_to_i915(rps); u32 cagf; - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) - cagf = (rpstat >> 8) & 0xff; + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + cagf = REG_FIELD_GET(MTL_CAGF_MASK, rpstat); + else if (GRAPHICS_VER(i915) >= 12) + cagf = REG_FIELD_GET(GEN12_CAGF_MASK, rpstat); + else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + cagf = REG_FIELD_GET(RPE_MASK, rpstat); else if (GRAPHICS_VER(i915) >= 9) - cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; + cagf = REG_FIELD_GET(GEN9_CAGF_MASK, rpstat); else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) - cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; + cagf = REG_FIELD_GET(HSW_CAGF_MASK, rpstat); else if (GRAPHICS_VER(i915) >= 6) - cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; + cagf = REG_FIELD_GET(GEN6_CAGF_MASK, rpstat); else - cagf = gen5_invert_freq(rps, (rpstat & MEMSTAT_PSTATE_MASK) >> - MEMSTAT_PSTATE_SHIFT); + cagf = gen5_invert_freq(rps, REG_FIELD_GET(MEMSTAT_PSTATE_MASK, rpstat)); return cagf; } -static u32 read_cagf(struct intel_rps *rps) +static u32 __read_cagf(struct intel_rps *rps, bool take_fw) { struct drm_i915_private *i915 = rps_to_i915(rps); struct intel_uncore *uncore = rps_to_uncore(rps); + i915_reg_t r = INVALID_MMIO_REG; u32 freq; - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { - vlv_punit_get(i915); - freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); - vlv_punit_put(i915); + /* + * For Gen12+ reading freq from HW does not need a forcewake and + * registers will return 0 freq when GT is in RC6 + */ + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { + r = MTL_MIRROR_TARGET_WP1; + } else if (GRAPHICS_VER(i915) >= 12) { + r = GEN12_RPSTAT1; + } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + vlv_iosf_sb_get(&i915->drm, BIT(VLV_IOSF_SB_PUNIT)); + freq = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, PUNIT_REG_GPU_FREQ_STS); + vlv_iosf_sb_put(&i915->drm, BIT(VLV_IOSF_SB_PUNIT)); } else if (GRAPHICS_VER(i915) >= 6) { - freq = intel_uncore_read(uncore, GEN6_RPSTAT1); + r = GEN6_RPSTAT1; } else { - freq = intel_uncore_read(uncore, MEMSTAT_ILK); + r = MEMSTAT_ILK; } + if (i915_mmio_reg_valid(r)) + freq = take_fw ? intel_uncore_read(uncore, r) : intel_uncore_read_fw(uncore, r); + return intel_rps_get_cagf(rps, freq); } +static u32 read_cagf(struct intel_rps *rps) +{ + return __read_cagf(rps, true); +} + u32 intel_rps_read_actual_frequency(struct intel_rps *rps) { struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; @@ -1936,11 +2156,578 @@ u32 intel_rps_read_actual_frequency(struct intel_rps *rps) return freq; } +u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps) +{ + return intel_gpu_freq(rps, __read_cagf(rps, false)); +} + +static u32 intel_rps_read_punit_req(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; + intel_wakeref_t wakeref; + u32 freq = 0; + + with_intel_runtime_pm_if_in_use(rpm, wakeref) + freq = intel_uncore_read(uncore, GEN6_RPNSWREQ); + + return freq; +} + +static u32 intel_rps_get_req(u32 pureq) +{ + u32 req = pureq >> GEN9_SW_REQ_UNSLICE_RATIO_SHIFT; + + return req; +} + +u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps) +{ + u32 freq = intel_rps_get_req(intel_rps_read_punit_req(rps)); + + return intel_gpu_freq(rps, freq); +} + +u32 intel_rps_get_requested_frequency(struct intel_rps *rps) +{ + if (rps_uses_slpc(rps)) + return intel_rps_read_punit_req_frequency(rps); + else + return intel_gpu_freq(rps, rps->cur_freq); +} + +u32 intel_rps_get_max_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->max_freq_softlimit; + else + return intel_gpu_freq(rps, rps->max_freq_softlimit); +} + +/** + * intel_rps_get_max_raw_freq - returns the max frequency in some raw format. + * @rps: the intel_rps structure + * + * Returns the max frequency in a raw format. In newer platforms raw is in + * units of 50 MHz. + */ +u32 intel_rps_get_max_raw_freq(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + u32 freq; + + if (rps_uses_slpc(rps)) { + return DIV_ROUND_CLOSEST(slpc->rp0_freq, + GT_FREQUENCY_MULTIPLIER); + } else { + freq = rps->max_freq; + if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) { + /* Convert GT frequency to 50 MHz units */ + freq /= GEN9_FREQ_SCALER; + } + return freq; + } +} + +u32 intel_rps_get_rp0_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->rp0_freq; + else + return intel_gpu_freq(rps, rps->rp0_freq); +} + +u32 intel_rps_get_rp1_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->rp1_freq; + else + return intel_gpu_freq(rps, rps->rp1_freq); +} + +u32 intel_rps_get_rpn_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->min_freq; + else + return intel_gpu_freq(rps, rps->min_freq); +} + +static void rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p) +{ + struct intel_gt *gt = rps_to_gt(rps); + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_rps_freq_caps caps; + u32 rp_state_limits; + u32 gt_perf_status; + u32 rpmodectl, rpinclimit, rpdeclimit; + u32 rpstat, cagf, reqf; + u32 rpcurupei, rpcurup, rpprevup; + u32 rpcurdownei, rpcurdown, rpprevdown; + u32 rpupei, rpupt, rpdownei, rpdownt; + u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask; + + rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS); + gen6_rps_get_freq_caps(rps, &caps); + if (IS_GEN9_LP(i915)) + gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS); + else + gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS); + + /* RPSTAT1 is in the GT power well */ + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ); + if (GRAPHICS_VER(i915) >= 9) { + reqf >>= 23; + } else { + reqf &= ~GEN6_TURBO_DISABLE; + if (IS_HASWELL(i915) || IS_BROADWELL(i915)) + reqf >>= 24; + else + reqf >>= 25; + } + reqf = intel_gpu_freq(rps, reqf); + + rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL); + rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); + rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); + + rpstat = intel_rps_read_rpstat(rps); + rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; + rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; + rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; + rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; + rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; + rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; + + rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI); + rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); + + rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); + rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); + + cagf = intel_rps_read_actual_frequency(rps); + + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + + if (GRAPHICS_VER(i915) >= 11) { + pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE); + pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK); + /* + * The equivalent to the PM ISR & IIR cannot be read + * without affecting the current state of the system + */ + pm_isr = 0; + pm_iir = 0; + } else if (GRAPHICS_VER(i915) >= 8) { + pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2)); + pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2)); + pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2)); + pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2)); + } else { + pm_ier = intel_uncore_read(uncore, GEN6_PMIER); + pm_imr = intel_uncore_read(uncore, GEN6_PMIMR); + pm_isr = intel_uncore_read(uncore, GEN6_PMISR); + pm_iir = intel_uncore_read(uncore, GEN6_PMIIR); + } + pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); + + drm_printf(p, "Video Turbo Mode: %s\n", + str_yes_no(rpmodectl & GEN6_RP_MEDIA_TURBO)); + drm_printf(p, "HW control enabled: %s\n", + str_yes_no(rpmodectl & GEN6_RP_ENABLE)); + drm_printf(p, "SW control enabled: %s\n", + str_yes_no((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE)); + + drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n", + pm_ier, pm_imr, pm_mask); + if (GRAPHICS_VER(i915) <= 10) + drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n", + pm_isr, pm_iir); + drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n", + rps->pm_intrmsk_mbz); + drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); + drm_printf(p, "Render p-state ratio: %d\n", + (gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8); + drm_printf(p, "Render p-state VID: %d\n", + gt_perf_status & 0xff); + drm_printf(p, "Render p-state limit: %d\n", + rp_state_limits & 0xff); + drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat); + drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl); + drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit); + drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit); + drm_printf(p, "RPNSWREQ: %dMHz\n", reqf); + drm_printf(p, "CAGF: %dMHz\n", cagf); + drm_printf(p, "RP CUR UP EI: %d (%lldns)\n", + rpcurupei, + intel_gt_pm_interval_to_ns(gt, rpcurupei)); + drm_printf(p, "RP CUR UP: %d (%lldns)\n", + rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup)); + drm_printf(p, "RP PREV UP: %d (%lldns)\n", + rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup)); + drm_printf(p, "Up threshold: %d%%\n", + rps->power.up_threshold); + drm_printf(p, "RP UP EI: %d (%lldns)\n", + rpupei, intel_gt_pm_interval_to_ns(gt, rpupei)); + drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n", + rpupt, intel_gt_pm_interval_to_ns(gt, rpupt)); + + drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n", + rpcurdownei, + intel_gt_pm_interval_to_ns(gt, rpcurdownei)); + drm_printf(p, "RP CUR DOWN: %d (%lldns)\n", + rpcurdown, + intel_gt_pm_interval_to_ns(gt, rpcurdown)); + drm_printf(p, "RP PREV DOWN: %d (%lldns)\n", + rpprevdown, + intel_gt_pm_interval_to_ns(gt, rpprevdown)); + drm_printf(p, "Down threshold: %d%%\n", + rps->power.down_threshold); + drm_printf(p, "RP DOWN EI: %d (%lldns)\n", + rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei)); + drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n", + rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt)); + + drm_printf(p, "Lowest (RPN) frequency: %dMHz\n", + intel_gpu_freq(rps, caps.min_freq)); + drm_printf(p, "Nominal (RP1) frequency: %dMHz\n", + intel_gpu_freq(rps, caps.rp1_freq)); + drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n", + intel_gpu_freq(rps, caps.rp0_freq)); + drm_printf(p, "Max overclocked frequency: %dMHz\n", + intel_gpu_freq(rps, rps->max_freq)); + + drm_printf(p, "Current freq: %d MHz\n", + intel_gpu_freq(rps, rps->cur_freq)); + drm_printf(p, "Actual freq: %d MHz\n", cagf); + drm_printf(p, "Idle freq: %d MHz\n", + intel_gpu_freq(rps, rps->idle_freq)); + drm_printf(p, "Min freq: %d MHz\n", + intel_gpu_freq(rps, rps->min_freq)); + drm_printf(p, "Boost freq: %d MHz\n", + intel_gpu_freq(rps, rps->boost_freq)); + drm_printf(p, "Max freq: %d MHz\n", + intel_gpu_freq(rps, rps->max_freq)); + drm_printf(p, + "efficient (RPe) frequency: %d MHz\n", + intel_gpu_freq(rps, rps->efficient_freq)); +} + +static void slpc_frequency_dump(struct intel_rps *rps, struct drm_printer *p) +{ + struct intel_gt *gt = rps_to_gt(rps); + struct intel_uncore *uncore = gt->uncore; + struct intel_rps_freq_caps caps; + u32 pm_mask; + + gen6_rps_get_freq_caps(rps, &caps); + pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); + + drm_printf(p, "PM MASK=0x%08x\n", pm_mask); + drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n", + rps->pm_intrmsk_mbz); + drm_printf(p, "RPSTAT1: 0x%08x\n", intel_rps_read_rpstat(rps)); + drm_printf(p, "RPNSWREQ: %dMHz\n", intel_rps_get_requested_frequency(rps)); + drm_printf(p, "Lowest (RPN) frequency: %dMHz\n", + intel_gpu_freq(rps, caps.min_freq)); + drm_printf(p, "Nominal (RP1) frequency: %dMHz\n", + intel_gpu_freq(rps, caps.rp1_freq)); + drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n", + intel_gpu_freq(rps, caps.rp0_freq)); + drm_printf(p, "Current freq: %d MHz\n", + intel_rps_get_requested_frequency(rps)); + drm_printf(p, "Actual freq: %d MHz\n", + intel_rps_read_actual_frequency(rps)); + drm_printf(p, "Min freq: %d MHz\n", + intel_rps_get_min_frequency(rps)); + drm_printf(p, "Boost freq: %d MHz\n", + intel_rps_get_boost_frequency(rps)); + drm_printf(p, "Max freq: %d MHz\n", + intel_rps_get_max_frequency(rps)); + drm_printf(p, + "efficient (RPe) frequency: %d MHz\n", + intel_gpu_freq(rps, caps.rp1_freq)); +} + +void gen6_rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p) +{ + if (rps_uses_slpc(rps)) + return slpc_frequency_dump(rps, p); + else + return rps_frequency_dump(rps, p); +} + +static int set_max_freq(struct intel_rps *rps, u32 val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + int ret = 0; + + mutex_lock(&rps->lock); + + val = intel_freq_opcode(rps, val); + if (val < rps->min_freq || + val > rps->max_freq || + val < rps->min_freq_softlimit) { + ret = -EINVAL; + goto unlock; + } + + if (val > rps->rp0_freq) + drm_dbg(&i915->drm, "User requested overclocking to %d\n", + intel_gpu_freq(rps, val)); + + rps->max_freq_softlimit = val; + + val = clamp_t(int, rps->cur_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); + + /* + * We still need *_set_rps to process the new max_delay and + * update the interrupt limits and PMINTRMSK even though + * frequency request may be unchanged. + */ + intel_rps_set(rps, val); + +unlock: + mutex_unlock(&rps->lock); + + return ret; +} + +int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return intel_guc_slpc_set_max_freq(slpc, val); + else + return set_max_freq(rps, val); +} + +u32 intel_rps_get_min_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->min_freq_softlimit; + else + return intel_gpu_freq(rps, rps->min_freq_softlimit); +} + +/** + * intel_rps_get_min_raw_freq - returns the min frequency in some raw format. + * @rps: the intel_rps structure + * + * Returns the min frequency in a raw format. In newer platforms raw is in + * units of 50 MHz. + */ +u32 intel_rps_get_min_raw_freq(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + u32 freq; + + if (rps_uses_slpc(rps)) { + return DIV_ROUND_CLOSEST(slpc->min_freq, + GT_FREQUENCY_MULTIPLIER); + } else { + freq = rps->min_freq; + if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) { + /* Convert GT frequency to 50 MHz units */ + freq /= GEN9_FREQ_SCALER; + } + return freq; + } +} + +static int set_min_freq(struct intel_rps *rps, u32 val) +{ + int ret = 0; + + mutex_lock(&rps->lock); + + val = intel_freq_opcode(rps, val); + if (val < rps->min_freq || + val > rps->max_freq || + val > rps->max_freq_softlimit) { + ret = -EINVAL; + goto unlock; + } + + rps->min_freq_softlimit = val; + + val = clamp_t(int, rps->cur_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); + + /* + * We still need *_set_rps to process the new min_delay and + * update the interrupt limits and PMINTRMSK even though + * frequency request may be unchanged. + */ + intel_rps_set(rps, val); + +unlock: + mutex_unlock(&rps->lock); + + return ret; +} + +int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return intel_guc_slpc_set_min_freq(slpc, val); + else + return set_min_freq(rps, val); +} + +u8 intel_rps_get_up_threshold(struct intel_rps *rps) +{ + return rps->power.up_threshold; +} + +static int rps_set_threshold(struct intel_rps *rps, u8 *threshold, u8 val) +{ + int ret; + + if (val > 100) + return -EINVAL; + + ret = mutex_lock_interruptible(&rps->lock); + if (ret) + return ret; + + if (*threshold == val) + goto out_unlock; + + *threshold = val; + + /* Force reset. */ + rps->last_freq = -1; + mutex_lock(&rps->power.mutex); + rps->power.mode = -1; + mutex_unlock(&rps->power.mutex); + + intel_rps_set(rps, clamp(rps->cur_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit)); + +out_unlock: + mutex_unlock(&rps->lock); + + return ret; +} + +int intel_rps_set_up_threshold(struct intel_rps *rps, u8 threshold) +{ + return rps_set_threshold(rps, &rps->power.up_threshold, threshold); +} + +u8 intel_rps_get_down_threshold(struct intel_rps *rps) +{ + return rps->power.down_threshold; +} + +int intel_rps_set_down_threshold(struct intel_rps *rps, u8 threshold) +{ + return rps_set_threshold(rps, &rps->power.down_threshold, threshold); +} + +static void intel_rps_set_manual(struct intel_rps *rps, bool enable) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u32 state = enable ? GEN9_RPSWCTL_ENABLE : GEN9_RPSWCTL_DISABLE; + + /* Allow punit to process software requests */ + intel_uncore_write(uncore, GEN6_RP_CONTROL, state); +} + +void intel_rps_raise_unslice(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + + mutex_lock(&rps->lock); + + if (rps_uses_slpc(rps)) { + /* RP limits have not been initialized yet for SLPC path */ + struct intel_rps_freq_caps caps; + + gen6_rps_get_freq_caps(rps, &caps); + + intel_rps_set_manual(rps, true); + intel_uncore_write(uncore, GEN6_RPNSWREQ, + ((caps.rp0_freq << + GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) | + GEN9_IGNORE_SLICE_RATIO)); + intel_rps_set_manual(rps, false); + } else { + intel_rps_set(rps, rps->rp0_freq); + } + + mutex_unlock(&rps->lock); +} + +void intel_rps_lower_unslice(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + + mutex_lock(&rps->lock); + + if (rps_uses_slpc(rps)) { + /* RP limits have not been initialized yet for SLPC path */ + struct intel_rps_freq_caps caps; + + gen6_rps_get_freq_caps(rps, &caps); + + intel_rps_set_manual(rps, true); + intel_uncore_write(uncore, GEN6_RPNSWREQ, + ((caps.min_freq << + GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) | + GEN9_IGNORE_SLICE_RATIO)); + intel_rps_set_manual(rps, false); + } else { + intel_rps_set(rps, rps->min_freq); + } + + mutex_unlock(&rps->lock); +} + +static u32 rps_read_mmio(struct intel_rps *rps, i915_reg_t reg32) +{ + struct intel_gt *gt = rps_to_gt(rps); + intel_wakeref_t wakeref; + u32 val; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + val = intel_uncore_read(gt->uncore, reg32); + + return val; +} + +bool rps_read_mask_mmio(struct intel_rps *rps, + i915_reg_t reg32, u32 mask) +{ + return rps_read_mmio(rps, reg32) & mask; +} + /* External interface for intel_ips.ko */ static struct drm_i915_private __rcu *ips_mchdev; -/** +/* * Tells the intel_ips driver that the i915 driver is now loaded, if * IPS got loaded first. * @@ -2012,7 +2799,7 @@ unsigned long i915_read_mch_val(void) return 0; with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - struct intel_ips *ips = &i915->gt.rps.ips; + struct intel_ips *ips = &to_gt(i915)->rps.ips; spin_lock_irq(&mchdev_lock); chipset_val = __ips_chipset_val(ips); @@ -2039,7 +2826,7 @@ bool i915_gpu_raise(void) if (!i915) return false; - rps = &i915->gt.rps; + rps = &to_gt(i915)->rps; spin_lock_irq(&mchdev_lock); if (rps->max_freq_softlimit < rps->max_freq) @@ -2066,7 +2853,7 @@ bool i915_gpu_lower(void) if (!i915) return false; - rps = &i915->gt.rps; + rps = &to_gt(i915)->rps; spin_lock_irq(&mchdev_lock); if (rps->max_freq_softlimit > rps->min_freq) @@ -2092,7 +2879,7 @@ bool i915_gpu_busy(void) if (!i915) return false; - ret = i915->gt.awake; + ret = to_gt(i915)->awake; drm_dev_put(&i915->drm); return ret; @@ -2115,11 +2902,11 @@ bool i915_gpu_turbo_disable(void) if (!i915) return false; - rps = &i915->gt.rps; + rps = &to_gt(i915)->rps; spin_lock_irq(&mchdev_lock); rps->max_freq_softlimit = rps->min_freq; - ret = !__gen5_rps_set(&i915->gt.rps, rps->min_freq); + ret = !__gen5_rps_set(&to_gt(i915)->rps, rps->min_freq); spin_unlock_irq(&mchdev_lock); drm_dev_put(&i915->drm); @@ -2129,4 +2916,5 @@ EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_rps.c" +#include "selftest_slpc.c" #endif |
