summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt/intel_rps.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_rps.c')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c804
1 files changed, 621 insertions, 183 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 54e7df788dbf..b01c837ab646 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -3,17 +3,30 @@
* Copyright © 2019 Intel Corporation
*/
-#include <drm/i915_drm.h>
+#include <linux/string_helpers.h>
+
+#include <drm/intel/i915_drm.h>
+
+#include "display/intel_display_rps.h"
+#include "display/vlv_clock.h"
+#include "soc/intel_dram.h"
#include "i915_drv.h"
+#include "i915_irq.h"
+#include "i915_reg.h"
+#include "i915_wait_util.h"
#include "intel_breadcrumbs.h"
#include "intel_gt.h"
#include "intel_gt_clock_utils.h"
#include "intel_gt_irq.h"
+#include "intel_gt_pm.h"
#include "intel_gt_pm_irq.h"
+#include "intel_gt_print.h"
+#include "intel_gt_regs.h"
+#include "intel_mchbar_regs.h"
#include "intel_pcode.h"
#include "intel_rps.h"
-#include "vlv_sideband.h"
+#include "vlv_iosf_sb.h"
#include "../../../platform/x86/intel_ips.h"
#define BUSY_MAX_EI 20u /* ms */
@@ -42,7 +55,7 @@ static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps)
{
struct intel_gt *gt = rps_to_gt(rps);
- return &gt->uc.guc.slpc;
+ return &gt_to_guc(gt)->slpc;
}
static bool rps_uses_slpc(struct intel_rps *rps)
@@ -64,14 +77,15 @@ static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
static void rps_timer(struct timer_list *t)
{
- struct intel_rps *rps = from_timer(rps, t, timer);
+ struct intel_rps *rps = timer_container_of(rps, t, timer);
+ struct intel_gt *gt = rps_to_gt(rps);
struct intel_engine_cs *engine;
ktime_t dt, last, timestamp;
enum intel_engine_id id;
s64 max_busy[3] = {};
timestamp = 0;
- for_each_engine(engine, rps_to_gt(rps), id) {
+ for_each_engine(engine, gt, id) {
s64 busy;
int i;
@@ -115,7 +129,7 @@ static void rps_timer(struct timer_list *t)
busy += div_u64(max_busy[i], 1 << i);
}
- GT_TRACE(rps_to_gt(rps),
+ GT_TRACE(gt,
"busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n",
busy, (int)div64_u64(100 * busy, dt),
max_busy[0], max_busy[1], max_busy[2],
@@ -125,12 +139,12 @@ static void rps_timer(struct timer_list *t)
rps->cur_freq < rps->max_freq_softlimit) {
rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD;
rps->pm_interval = 1;
- schedule_work(&rps->work);
+ queue_work(gt->i915->unordered_wq, &rps->work);
} else if (100 * busy < rps->power.down_threshold * dt &&
rps->cur_freq > rps->min_freq_softlimit) {
rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD;
rps->pm_interval = 1;
- schedule_work(&rps->work);
+ queue_work(gt->i915->unordered_wq, &rps->work);
} else {
rps->last_adj = 0;
}
@@ -150,7 +164,7 @@ static void rps_start_timer(struct intel_rps *rps)
static void rps_stop_timer(struct intel_rps *rps)
{
- del_timer_sync(&rps->timer);
+ timer_delete_sync(&rps->timer);
rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
cancel_work_sync(&rps->work);
}
@@ -189,9 +203,9 @@ static void rps_enable_interrupts(struct intel_rps *rps)
rps_reset_ei(rps);
- spin_lock_irq(&gt->irq_lock);
+ spin_lock_irq(gt->irq_lock);
gen6_gt_pm_enable_irq(gt, rps->pm_events);
- spin_unlock_irq(&gt->irq_lock);
+ spin_unlock_irq(gt->irq_lock);
intel_uncore_write(gt->uncore,
GEN6_PMINTRMSK, rps_pm_mask(rps, rps->last_freq));
@@ -212,14 +226,14 @@ static void rps_reset_interrupts(struct intel_rps *rps)
{
struct intel_gt *gt = rps_to_gt(rps);
- spin_lock_irq(&gt->irq_lock);
+ spin_lock_irq(gt->irq_lock);
if (GRAPHICS_VER(gt->i915) >= 11)
gen11_rps_reset_interrupts(rps);
else
gen6_rps_reset_interrupts(rps);
rps->pm_iir = 0;
- spin_unlock_irq(&gt->irq_lock);
+ spin_unlock_irq(gt->irq_lock);
}
static void rps_disable_interrupts(struct intel_rps *rps)
@@ -229,9 +243,9 @@ static void rps_disable_interrupts(struct intel_rps *rps)
intel_uncore_write(gt->uncore,
GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
- spin_lock_irq(&gt->irq_lock);
+ spin_lock_irq(gt->irq_lock);
gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS);
- spin_unlock_irq(&gt->irq_lock);
+ spin_unlock_irq(gt->irq_lock);
intel_synchronize_irq(gt->i915);
@@ -254,10 +268,10 @@ static const struct cparams {
u16 c;
} cparams[] = {
{ 1, 1333, 301, 28664 },
- { 1, 1066, 294, 24460 },
+ { 1, 1067, 294, 24460 },
{ 1, 800, 294, 25192 },
{ 0, 1333, 276, 27605 },
- { 0, 1066, 276, 27605 },
+ { 0, 1067, 276, 27605 },
{ 0, 800, 231, 23784 },
};
@@ -265,19 +279,24 @@ static void gen5_rps_init(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
struct intel_uncore *uncore = rps_to_uncore(rps);
+ unsigned int fsb_freq, mem_freq;
u8 fmax, fmin, fstart;
u32 rgvmodectl;
int c_m, i;
- if (i915->fsb_freq <= 3200)
+ fsb_freq = intel_fsb_freq(i915);
+ mem_freq = intel_mem_freq(i915);
+
+ if (fsb_freq <= 3200000)
c_m = 0;
- else if (i915->fsb_freq <= 4800)
+ else if (fsb_freq <= 4800000)
c_m = 1;
else
c_m = 2;
for (i = 0; i < ARRAY_SIZE(cparams); i++) {
- if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) {
+ if (cparams[i].i == c_m &&
+ cparams[i].t == DIV_ROUND_CLOSEST(mem_freq, 1000)) {
rps->ips.m = cparams[i].m;
rps->ips.c = cparams[i].c;
break;
@@ -425,7 +444,8 @@ static int __gen5_rps_set(struct intel_rps *rps, u8 val)
rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
if (rgvswctl & MEMCTL_CMD_STS) {
- DRM_DEBUG("gpu busy, RCS change rejected\n");
+ drm_dbg(&rps_to_i915(rps)->drm,
+ "gpu busy, RCS change rejected\n");
return -EBUSY; /* still busy with another command */
}
@@ -537,6 +557,7 @@ static unsigned int init_emon(struct intel_uncore *uncore)
static bool gen5_rps_enable(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_display *display = i915->display;
struct intel_uncore *uncore = rps_to_uncore(rps);
u8 fstart, vstart;
u32 rgvmodectl;
@@ -594,9 +615,7 @@ static bool gen5_rps_enable(struct intel_rps *rps)
rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC);
rps->ips.last_time2 = ktime_get_raw_ns();
- spin_lock(&i915->irq_lock);
- ilk_enable_display_irq(i915, DE_PCU_EVENT);
- spin_unlock(&i915->irq_lock);
+ ilk_display_rps_enable(display);
spin_unlock_irq(&mchdev_lock);
@@ -608,21 +627,18 @@ static bool gen5_rps_enable(struct intel_rps *rps)
static void gen5_rps_disable(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_display *display = i915->display;
struct intel_uncore *uncore = rps_to_uncore(rps);
u16 rgvswctl;
spin_lock_irq(&mchdev_lock);
- spin_lock(&i915->irq_lock);
- ilk_disable_display_irq(i915, DE_PCU_EVENT);
- spin_unlock(&i915->irq_lock);
+ ilk_display_rps_disable(display);
rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
/* Ack interrupts, disable EFC interrupt */
- intel_uncore_write(uncore, MEMINTREN,
- intel_uncore_read(uncore, MEMINTREN) &
- ~MEMINT_EVAL_CHG_EN);
+ intel_uncore_rmw(uncore, MEMINTREN, MEMINT_EVAL_CHG_EN, 0);
intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
/* Go back to the starting frequency */
@@ -664,7 +680,6 @@ static void rps_set_power(struct intel_rps *rps, int new_power)
{
struct intel_gt *gt = rps_to_gt(rps);
struct intel_uncore *uncore = gt->uncore;
- u32 threshold_up = 0, threshold_down = 0; /* in % */
u32 ei_up = 0, ei_down = 0;
lockdep_assert_held(&rps->power.mutex);
@@ -672,9 +687,6 @@ static void rps_set_power(struct intel_rps *rps, int new_power)
if (new_power == rps->power.mode)
return;
- threshold_up = 95;
- threshold_down = 85;
-
/* Note the units here are not exactly 1us, but 1280ns. */
switch (new_power) {
case LOW_POWER:
@@ -701,17 +713,22 @@ static void rps_set_power(struct intel_rps *rps, int new_power)
GT_TRACE(gt,
"changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n",
- new_power, threshold_up, ei_up, threshold_down, ei_down);
+ new_power,
+ rps->power.up_threshold, ei_up,
+ rps->power.down_threshold, ei_down);
set(uncore, GEN6_RP_UP_EI,
intel_gt_ns_to_pm_interval(gt, ei_up * 1000));
set(uncore, GEN6_RP_UP_THRESHOLD,
- intel_gt_ns_to_pm_interval(gt, ei_up * threshold_up * 10));
+ intel_gt_ns_to_pm_interval(gt,
+ ei_up * rps->power.up_threshold * 10));
set(uncore, GEN6_RP_DOWN_EI,
intel_gt_ns_to_pm_interval(gt, ei_down * 1000));
set(uncore, GEN6_RP_DOWN_THRESHOLD,
- intel_gt_ns_to_pm_interval(gt, ei_down * threshold_down * 10));
+ intel_gt_ns_to_pm_interval(gt,
+ ei_down *
+ rps->power.down_threshold * 10));
set(uncore, GEN6_RP_CONTROL,
(GRAPHICS_VER(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
@@ -723,8 +740,6 @@ static void rps_set_power(struct intel_rps *rps, int new_power)
skip_hw_write:
rps->power.mode = new_power;
- rps->power.up_threshold = threshold_up;
- rps->power.down_threshold = threshold_down;
}
static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val)
@@ -769,7 +784,8 @@ static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val)
void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive)
{
- GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n", yesno(interactive));
+ GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n",
+ str_yes_no(interactive));
mutex_lock(&rps->power.mutex);
if (interactive) {
@@ -811,9 +827,9 @@ static int vlv_rps_set(struct intel_rps *rps, u8 val)
struct drm_i915_private *i915 = rps_to_i915(rps);
int err;
- vlv_punit_get(i915);
- err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val);
- vlv_punit_put(i915);
+ vlv_iosf_sb_get(&i915->drm, BIT(VLV_IOSF_SB_PUNIT));
+ err = vlv_iosf_sb_write(&i915->drm, VLV_IOSF_SB_PUNIT, PUNIT_REG_GPU_FREQ_REQ, val);
+ vlv_iosf_sb_put(&i915->drm, BIT(VLV_IOSF_SB_PUNIT));
GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d\n",
val, intel_gpu_freq(rps, val));
@@ -965,7 +981,7 @@ static int rps_set_boost_freq(struct intel_rps *rps, u32 val)
}
mutex_unlock(&rps->lock);
if (boost)
- schedule_work(&rps->work);
+ queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work);
return 0;
}
@@ -990,6 +1006,10 @@ void intel_rps_dec_waiters(struct intel_rps *rps)
if (rps_uses_slpc(rps)) {
slpc = rps_to_slpc(rps);
+ /* Don't decrement num_waiters for req where increment was skipped */
+ if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING)
+ return;
+
intel_guc_slpc_dec_waiters(slpc);
} else {
atomic_dec(&rps->num_waiters);
@@ -1003,6 +1023,10 @@ void intel_rps_boost(struct i915_request *rq)
if (i915_request_signaled(rq) || i915_request_has_waitboost(rq))
return;
+ /* Waitboost is not needed for contexts marked with a Freq hint */
+ if (test_bit(CONTEXT_LOW_LATENCY, &rq->context->flags))
+ return;
+
/* Serializes with i915_request_retire() */
if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) {
struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
@@ -1010,9 +1034,24 @@ void intel_rps_boost(struct i915_request *rq)
if (rps_uses_slpc(rps)) {
slpc = rps_to_slpc(rps);
+ /* Waitboost should not be done with power saving profile */
+ if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING)
+ return;
+
/* Return if old value is non zero */
- if (!atomic_fetch_inc(&slpc->num_waiters))
- schedule_work(&slpc->boost_work);
+ if (!atomic_fetch_inc(&slpc->num_waiters)) {
+ /*
+ * Skip queuing boost work if frequency is already boosted,
+ * but still increment num_waiters.
+ */
+ if (slpc->min_freq_softlimit >= slpc->boost_freq)
+ return;
+
+ GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
+ rq->fence.context, rq->fence.seqno);
+ queue_work(rps_to_gt(rps)->i915->unordered_wq,
+ &slpc->boost_work);
+ }
return;
}
@@ -1027,7 +1066,7 @@ void intel_rps_boost(struct i915_request *rq)
rq->fence.context, rq->fence.seqno);
if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
- schedule_work(&rps->work);
+ queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work);
WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */
}
@@ -1064,24 +1103,98 @@ int intel_rps_set(struct intel_rps *rps, u8 val)
return 0;
}
-static void gen6_rps_init(struct intel_rps *rps)
+static u32 intel_rps_read_state_cap(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
- u32 rp_state_cap = intel_rps_read_state_cap(rps);
+ struct intel_uncore *uncore = rps_to_uncore(rps);
- /* All of these values are in units of 50MHz */
+ if (IS_GEN9_LP(i915))
+ return intel_uncore_read(uncore, BXT_RP_STATE_CAP);
+ else
+ return intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
+}
+
+static void
+mtl_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ u32 rp_state_cap = rps_to_gt(rps)->type == GT_MEDIA ?
+ intel_uncore_read(uncore, MTL_MEDIAP_STATE_CAP) :
+ intel_uncore_read(uncore, MTL_RP_STATE_CAP);
+ u32 rpe = rps_to_gt(rps)->type == GT_MEDIA ?
+ intel_uncore_read(uncore, MTL_MPE_FREQUENCY) :
+ intel_uncore_read(uncore, MTL_GT_RPE_FREQUENCY);
+
+ /* MTL values are in units of 16.67 MHz */
+ caps->rp0_freq = REG_FIELD_GET(MTL_RP0_CAP_MASK, rp_state_cap);
+ caps->min_freq = REG_FIELD_GET(MTL_RPN_CAP_MASK, rp_state_cap);
+ caps->rp1_freq = REG_FIELD_GET(MTL_RPE_MASK, rpe);
+}
+
+static void
+__gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 rp_state_cap;
+
+ rp_state_cap = intel_rps_read_state_cap(rps);
/* static values from HW: RP0 > RP1 > RPn (min_freq) */
if (IS_GEN9_LP(i915)) {
- rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
- rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
- rps->min_freq = (rp_state_cap >> 0) & 0xff;
+ caps->rp0_freq = (rp_state_cap >> 16) & 0xff;
+ caps->rp1_freq = (rp_state_cap >> 8) & 0xff;
+ caps->min_freq = (rp_state_cap >> 0) & 0xff;
} else {
- rps->rp0_freq = (rp_state_cap >> 0) & 0xff;
- rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
- rps->min_freq = (rp_state_cap >> 16) & 0xff;
+ caps->rp0_freq = (rp_state_cap >> 0) & 0xff;
+ if (GRAPHICS_VER(i915) >= 10)
+ caps->rp1_freq = REG_FIELD_GET(RPE_MASK,
+ intel_uncore_read(to_gt(i915)->uncore,
+ GEN10_FREQ_INFO_REC));
+ else
+ caps->rp1_freq = (rp_state_cap >> 8) & 0xff;
+ caps->min_freq = (rp_state_cap >> 16) & 0xff;
}
+ if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) {
+ /*
+ * In this case rp_state_cap register reports frequencies in
+ * units of 50 MHz. Convert these to the actual "hw unit", i.e.
+ * units of 16.67 MHz
+ */
+ caps->rp0_freq *= GEN9_FREQ_SCALER;
+ caps->rp1_freq *= GEN9_FREQ_SCALER;
+ caps->min_freq *= GEN9_FREQ_SCALER;
+ }
+}
+
+/**
+ * gen6_rps_get_freq_caps - Get freq caps exposed by HW
+ * @rps: the intel_rps structure
+ * @caps: returned freq caps
+ *
+ * Returned "caps" frequencies should be converted to MHz using
+ * intel_gpu_freq()
+ */
+void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+ return mtl_get_freq_caps(rps, caps);
+ else
+ return __gen6_rps_get_freq_caps(rps, caps);
+}
+
+static void gen6_rps_init(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_rps_freq_caps caps;
+
+ gen6_rps_get_freq_caps(rps, &caps);
+ rps->rp0_freq = caps.rp0_freq;
+ rps->rp1_freq = caps.rp1_freq;
+ rps->min_freq = caps.min_freq;
+
/* hw_max = RP0 until we check for overclocking */
rps->max_freq = rps->rp0_freq;
@@ -1089,27 +1202,19 @@ static void gen6_rps_init(struct intel_rps *rps)
if (IS_HASWELL(i915) || IS_BROADWELL(i915) ||
IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) {
u32 ddcc_status = 0;
+ u32 mult = 1;
- if (sandybridge_pcode_read(i915,
- HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
- &ddcc_status, NULL) == 0)
+ if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11)
+ mult = GEN9_FREQ_SCALER;
+ if (snb_pcode_read(rps_to_gt(rps)->uncore,
+ HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
+ &ddcc_status, NULL) == 0)
rps->efficient_freq =
- clamp_t(u8,
- (ddcc_status >> 8) & 0xff,
+ clamp_t(u32,
+ ((ddcc_status >> 8) & 0xff) * mult,
rps->min_freq,
rps->max_freq);
}
-
- if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) {
- /* Store the frequency values in 16.66 MHZ units, which is
- * the natural hardware unit for SKL
- */
- rps->rp0_freq *= GEN9_FREQ_SCALER;
- rps->rp1_freq *= GEN9_FREQ_SCALER;
- rps->min_freq *= GEN9_FREQ_SCALER;
- rps->max_freq *= GEN9_FREQ_SCALER;
- rps->efficient_freq *= GEN9_FREQ_SCALER;
- }
}
static bool rps_reset(struct intel_rps *rps)
@@ -1182,7 +1287,7 @@ static int chv_rps_max_freq(struct intel_rps *rps)
struct intel_gt *gt = rps_to_gt(rps);
u32 val;
- val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
+ val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, FB_GFX_FMAX_AT_VMAX_FUSE);
switch (gt->info.sseu.eu_total) {
case 8:
@@ -1209,7 +1314,7 @@ static int chv_rps_rpe_freq(struct intel_rps *rps)
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 val;
- val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG);
+ val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, PUNIT_GPU_DUTYCYCLE_REG);
val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT;
return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
@@ -1220,7 +1325,7 @@ static int chv_rps_guar_freq(struct intel_rps *rps)
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 val;
- val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
+ val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, FB_GFX_FMAX_AT_VMAX_FUSE);
return val & FB_GFX_FREQ_FUSE_MASK;
}
@@ -1230,7 +1335,7 @@ static u32 chv_rps_min_freq(struct intel_rps *rps)
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 val;
- val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE);
+ val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, FB_GFX_FMIN_AT_VMIN_FUSE);
val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT;
return val & FB_GFX_FREQ_FUSE_MASK;
@@ -1264,20 +1369,21 @@ static bool chv_rps_enable(struct intel_rps *rps)
GEN6_PM_RP_DOWN_TIMEOUT);
/* Setting Fixed Bias */
- vlv_punit_get(i915);
+ vlv_iosf_sb_get(&i915->drm, BIT(VLV_IOSF_SB_PUNIT));
val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
- vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
+ vlv_iosf_sb_write(&i915->drm, VLV_IOSF_SB_PUNIT, VLV_TURBO_SOC_OVERRIDE, val);
- val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+ val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, PUNIT_REG_GPU_FREQ_STS);
- vlv_punit_put(i915);
+ vlv_iosf_sb_put(&i915->drm, BIT(VLV_IOSF_SB_PUNIT));
/* RPS code assumes GPLL is used */
drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
"GPLL not enabled\n");
- drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE));
+ drm_dbg(&i915->drm, "GPLL enabled? %s\n",
+ str_yes_no(val & GPLLENABLE));
drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val);
return rps_reset(rps);
@@ -1288,7 +1394,7 @@ static int vlv_rps_guar_freq(struct intel_rps *rps)
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 val, rp1;
- val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
+ val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_NC, IOSF_NC_FB_GFX_FREQ_FUSE);
rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK;
rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
@@ -1301,7 +1407,7 @@ static int vlv_rps_max_freq(struct intel_rps *rps)
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 val, rp0;
- val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
+ val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_NC, IOSF_NC_FB_GFX_FREQ_FUSE);
rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
/* Clamp to max */
@@ -1315,9 +1421,9 @@ static int vlv_rps_rpe_freq(struct intel_rps *rps)
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 val, rpe;
- val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
+ val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_NC, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
- val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
+ val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_NC, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
return rpe;
@@ -1328,7 +1434,7 @@ static int vlv_rps_min_freq(struct intel_rps *rps)
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 val;
- val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff;
+ val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, PUNIT_REG_GPU_LFM) & 0xff;
/*
* According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
* for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
@@ -1364,21 +1470,22 @@ static bool vlv_rps_enable(struct intel_rps *rps)
/* WaGsvRC0ResidencyMethod:vlv */
rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
- vlv_punit_get(i915);
+ vlv_iosf_sb_get(&i915->drm, BIT(VLV_IOSF_SB_PUNIT));
/* Setting Fixed Bias */
val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
- vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
+ vlv_iosf_sb_write(&i915->drm, VLV_IOSF_SB_PUNIT, VLV_TURBO_SOC_OVERRIDE, val);
- val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+ val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, PUNIT_REG_GPU_FREQ_STS);
- vlv_punit_put(i915);
+ vlv_iosf_sb_put(&i915->drm, BIT(VLV_IOSF_SB_PUNIT));
/* RPS code assumes GPLL is used */
drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
"GPLL not enabled\n");
- drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE));
+ drm_dbg(&i915->drm, "GPLL enabled? %s\n",
+ str_yes_no(val & GPLLENABLE));
drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val);
return rps_reset(rps);
@@ -1471,10 +1578,12 @@ void intel_rps_enable(struct intel_rps *rps)
return;
GT_TRACE(rps_to_gt(rps),
- "min:%x, max:%x, freq:[%d, %d]\n",
+ "min:%x, max:%x, freq:[%d, %d], thresholds:[%u, %u]\n",
rps->min_freq, rps->max_freq,
intel_gpu_freq(rps, rps->min_freq),
- intel_gpu_freq(rps, rps->max_freq));
+ intel_gpu_freq(rps, rps->max_freq),
+ rps->power.up_threshold,
+ rps->power.down_threshold);
GEM_BUG_ON(rps->max_freq < rps->min_freq);
GEM_BUG_ON(rps->idle_freq > rps->max_freq);
@@ -1484,7 +1593,7 @@ void intel_rps_enable(struct intel_rps *rps)
if (has_busy_stats(rps))
intel_rps_set_timer(rps);
- else if (GRAPHICS_VER(i915) >= 6)
+ else if (GRAPHICS_VER(i915) >= 6 && GRAPHICS_VER(i915) <= 11)
intel_rps_set_interrupts(rps);
else
/* Ironlake currently uses intel_ips.ko */ {}
@@ -1501,6 +1610,9 @@ void intel_rps_disable(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
+ if (!intel_rps_is_enabled(rps))
+ return;
+
intel_rps_clear_enabled(rps);
intel_rps_clear_interrupts(rps);
intel_rps_clear_timer(rps);
@@ -1578,10 +1690,7 @@ static void vlv_init_gpll_ref_freq(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
- rps->gpll_ref_freq =
- vlv_get_cck_clock(i915, "GPLL ref",
- CCK_GPLL_CLOCK_CONTROL,
- i915->czclk_freq);
+ rps->gpll_ref_freq = vlv_clock_get_gpll(&i915->drm);
drm_dbg(&i915->drm, "GPLL reference freq: %d kHz\n",
rps->gpll_ref_freq);
@@ -1590,30 +1699,14 @@ static void vlv_init_gpll_ref_freq(struct intel_rps *rps)
static void vlv_rps_init(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
- u32 val;
- vlv_iosf_sb_get(i915,
+ vlv_init_gpll_ref_freq(rps);
+
+ vlv_iosf_sb_get(&i915->drm,
BIT(VLV_IOSF_SB_PUNIT) |
BIT(VLV_IOSF_SB_NC) |
BIT(VLV_IOSF_SB_CCK));
- vlv_init_gpll_ref_freq(rps);
-
- val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
- switch ((val >> 6) & 3) {
- case 0:
- case 1:
- i915->mem_freq = 800;
- break;
- case 2:
- i915->mem_freq = 1066;
- break;
- case 3:
- i915->mem_freq = 1333;
- break;
- }
- drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq);
-
rps->max_freq = vlv_rps_max_freq(rps);
rps->rp0_freq = rps->max_freq;
drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
@@ -1631,7 +1724,7 @@ static void vlv_rps_init(struct intel_rps *rps)
drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
- vlv_iosf_sb_put(i915,
+ vlv_iosf_sb_put(&i915->drm,
BIT(VLV_IOSF_SB_PUNIT) |
BIT(VLV_IOSF_SB_NC) |
BIT(VLV_IOSF_SB_CCK));
@@ -1640,27 +1733,14 @@ static void vlv_rps_init(struct intel_rps *rps)
static void chv_rps_init(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
- u32 val;
- vlv_iosf_sb_get(i915,
+ vlv_init_gpll_ref_freq(rps);
+
+ vlv_iosf_sb_get(&i915->drm,
BIT(VLV_IOSF_SB_PUNIT) |
BIT(VLV_IOSF_SB_NC) |
BIT(VLV_IOSF_SB_CCK));
- vlv_init_gpll_ref_freq(rps);
-
- val = vlv_cck_read(i915, CCK_FUSE_REG);
-
- switch ((val >> 2) & 0x7) {
- case 3:
- i915->mem_freq = 2000;
- break;
- default:
- i915->mem_freq = 1600;
- break;
- }
- drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq);
-
rps->max_freq = chv_rps_max_freq(rps);
rps->rp0_freq = rps->max_freq;
drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
@@ -1678,7 +1758,7 @@ static void chv_rps_init(struct intel_rps *rps)
drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
- vlv_iosf_sb_put(i915,
+ vlv_iosf_sb_put(&i915->drm,
BIT(VLV_IOSF_SB_PUNIT) |
BIT(VLV_IOSF_SB_NC) |
BIT(VLV_IOSF_SB_CCK));
@@ -1697,6 +1777,7 @@ static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei)
static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir)
{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
struct intel_uncore *uncore = rps_to_uncore(rps);
const struct intel_rps_ei *prev = &rps->ei;
struct intel_rps_ei now;
@@ -1713,7 +1794,7 @@ static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir)
time = ktime_us_delta(now.ktime, prev->ktime);
- time *= rps_to_i915(rps)->czclk_freq;
+ time *= vlv_clock_get_czclk(&i915->drm);
/* Workload can be split between render + media,
* e.g. SwapBuffers being blitted in X after being rendered in
@@ -1744,10 +1825,10 @@ static void rps_work(struct work_struct *work)
int new_freq, adj, min, max;
u32 pm_iir = 0;
- spin_lock_irq(&gt->irq_lock);
+ spin_lock_irq(gt->irq_lock);
pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events;
client_boost = atomic_read(&rps->num_waiters);
- spin_unlock_irq(&gt->irq_lock);
+ spin_unlock_irq(gt->irq_lock);
/* Make sure we didn't queue anything we're not going to process. */
if (!pm_iir && !client_boost)
@@ -1770,7 +1851,7 @@ static void rps_work(struct work_struct *work)
GT_TRACE(gt,
"pm_iir:%x, client_boost:%s, last:%d, cur:%x, min:%x, max:%x\n",
- pm_iir, yesno(client_boost),
+ pm_iir, str_yes_no(client_boost),
adj, new_freq, min, max);
if (client_boost && new_freq < rps->boost_freq) {
@@ -1820,9 +1901,9 @@ static void rps_work(struct work_struct *work)
mutex_unlock(&rps->lock);
out:
- spin_lock_irq(&gt->irq_lock);
+ spin_lock_irq(gt->irq_lock);
gen6_gt_pm_unmask_irq(gt, rps->pm_events);
- spin_unlock_irq(&gt->irq_lock);
+ spin_unlock_irq(gt->irq_lock);
}
void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
@@ -1830,7 +1911,7 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
struct intel_gt *gt = rps_to_gt(rps);
const u32 events = rps->pm_events & pm_iir;
- lockdep_assert_held(&gt->irq_lock);
+ lockdep_assert_held(gt->irq_lock);
if (unlikely(!events))
return;
@@ -1840,7 +1921,7 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
gen6_gt_pm_mask_irq(gt, events);
rps->pm_iir |= events;
- schedule_work(&rps->work);
+ queue_work(gt->i915->unordered_wq, &rps->work);
}
void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
@@ -1850,15 +1931,15 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
events = pm_iir & rps->pm_events;
if (events) {
- spin_lock(&gt->irq_lock);
+ spin_lock(gt->irq_lock);
GT_TRACE(gt, "irq events:%x\n", events);
gen6_gt_pm_mask_irq(gt, events);
rps->pm_iir |= events;
- schedule_work(&rps->work);
- spin_unlock(&gt->irq_lock);
+ queue_work(gt->i915->unordered_wq, &rps->work);
+ spin_unlock(gt->irq_lock);
}
if (GRAPHICS_VER(gt->i915) >= 8)
@@ -1868,7 +1949,8 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
intel_engine_cs_irq(gt->engine[VECS0], pm_iir >> 10);
if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
- DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
+ drm_dbg(&rps_to_i915(rps)->drm,
+ "Command parser error, pm_iir 0x%08x\n", pm_iir);
}
void gen5_rps_irq_handler(struct intel_rps *rps)
@@ -1934,14 +2016,15 @@ void intel_rps_init(struct intel_rps *rps)
/* Derive initial user preferences/limits from the hardware limits */
rps->max_freq_softlimit = rps->max_freq;
+ rps_to_gt(rps)->defaults.max_freq = rps->max_freq_softlimit;
rps->min_freq_softlimit = rps->min_freq;
+ rps_to_gt(rps)->defaults.min_freq = rps->min_freq_softlimit;
/* After setting max-softlimit, find the overclock max freq */
if (GRAPHICS_VER(i915) == 6 || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) {
u32 params = 0;
- sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS,
- &params, NULL);
+ snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_READ_OC_PARAMS, &params, NULL);
if (params & BIT(31)) { /* OC supported */
drm_dbg(&i915->drm,
"Overclocking supported, max: %dMHz, overclock: %dMHz\n",
@@ -1951,6 +2034,12 @@ void intel_rps_init(struct intel_rps *rps)
}
}
+ /* Set default thresholds in % */
+ rps->power.up_threshold = 95;
+ rps_to_gt(rps)->defaults.rps_up_threshold = rps->power.up_threshold;
+ rps->power.down_threshold = 85;
+ rps_to_gt(rps)->defaults.rps_down_threshold = rps->power.down_threshold;
+
/* Finally allow us to boost to max by default */
rps->boost_freq = rps->max_freq;
rps->idle_freq = rps->min_freq;
@@ -1986,45 +2075,75 @@ void intel_rps_sanitize(struct intel_rps *rps)
rps_disable_interrupts(rps);
}
-u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
+u32 intel_rps_read_rpstat(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ i915_reg_t rpstat;
+
+ rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1;
+
+ return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat);
+}
+
+static u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 cagf;
- if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
- cagf = (rpstat >> 8) & 0xff;
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+ cagf = REG_FIELD_GET(MTL_CAGF_MASK, rpstat);
+ else if (GRAPHICS_VER(i915) >= 12)
+ cagf = REG_FIELD_GET(GEN12_CAGF_MASK, rpstat);
+ else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+ cagf = REG_FIELD_GET(RPE_MASK, rpstat);
else if (GRAPHICS_VER(i915) >= 9)
- cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
+ cagf = REG_FIELD_GET(GEN9_CAGF_MASK, rpstat);
else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
- cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
+ cagf = REG_FIELD_GET(HSW_CAGF_MASK, rpstat);
else if (GRAPHICS_VER(i915) >= 6)
- cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
+ cagf = REG_FIELD_GET(GEN6_CAGF_MASK, rpstat);
else
- cagf = gen5_invert_freq(rps, (rpstat & MEMSTAT_PSTATE_MASK) >>
- MEMSTAT_PSTATE_SHIFT);
+ cagf = gen5_invert_freq(rps, REG_FIELD_GET(MEMSTAT_PSTATE_MASK, rpstat));
return cagf;
}
-static u32 read_cagf(struct intel_rps *rps)
+static u32 __read_cagf(struct intel_rps *rps, bool take_fw)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
struct intel_uncore *uncore = rps_to_uncore(rps);
+ i915_reg_t r = INVALID_MMIO_REG;
u32 freq;
- if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
- vlv_punit_get(i915);
- freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
- vlv_punit_put(i915);
+ /*
+ * For Gen12+ reading freq from HW does not need a forcewake and
+ * registers will return 0 freq when GT is in RC6
+ */
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
+ r = MTL_MIRROR_TARGET_WP1;
+ } else if (GRAPHICS_VER(i915) >= 12) {
+ r = GEN12_RPSTAT1;
+ } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+ vlv_iosf_sb_get(&i915->drm, BIT(VLV_IOSF_SB_PUNIT));
+ freq = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, PUNIT_REG_GPU_FREQ_STS);
+ vlv_iosf_sb_put(&i915->drm, BIT(VLV_IOSF_SB_PUNIT));
} else if (GRAPHICS_VER(i915) >= 6) {
- freq = intel_uncore_read(uncore, GEN6_RPSTAT1);
+ r = GEN6_RPSTAT1;
} else {
- freq = intel_uncore_read(uncore, MEMSTAT_ILK);
+ r = MEMSTAT_ILK;
}
+ if (i915_mmio_reg_valid(r))
+ freq = take_fw ? intel_uncore_read(uncore, r) : intel_uncore_read_fw(uncore, r);
+
return intel_rps_get_cagf(rps, freq);
}
+static u32 read_cagf(struct intel_rps *rps)
+{
+ return __read_cagf(rps, true);
+}
+
u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
{
struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
@@ -2037,7 +2156,12 @@ u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
return freq;
}
-u32 intel_rps_read_punit_req(struct intel_rps *rps)
+u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps)
+{
+ return intel_gpu_freq(rps, __read_cagf(rps, false));
+}
+
+static u32 intel_rps_read_punit_req(struct intel_rps *rps)
{
struct intel_uncore *uncore = rps_to_uncore(rps);
struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
@@ -2082,6 +2206,31 @@ u32 intel_rps_get_max_frequency(struct intel_rps *rps)
return intel_gpu_freq(rps, rps->max_freq_softlimit);
}
+/**
+ * intel_rps_get_max_raw_freq - returns the max frequency in some raw format.
+ * @rps: the intel_rps structure
+ *
+ * Returns the max frequency in a raw format. In newer platforms raw is in
+ * units of 50 MHz.
+ */
+u32 intel_rps_get_max_raw_freq(struct intel_rps *rps)
+{
+ struct intel_guc_slpc *slpc = rps_to_slpc(rps);
+ u32 freq;
+
+ if (rps_uses_slpc(rps)) {
+ return DIV_ROUND_CLOSEST(slpc->rp0_freq,
+ GT_FREQUENCY_MULTIPLIER);
+ } else {
+ freq = rps->max_freq;
+ if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) {
+ /* Convert GT frequency to 50 MHz units */
+ freq /= GEN9_FREQ_SCALER;
+ }
+ return freq;
+ }
+}
+
u32 intel_rps_get_rp0_frequency(struct intel_rps *rps)
{
struct intel_guc_slpc *slpc = rps_to_slpc(rps);
@@ -2112,6 +2261,213 @@ u32 intel_rps_get_rpn_frequency(struct intel_rps *rps)
return intel_gpu_freq(rps, rps->min_freq);
}
+static void rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ struct intel_rps_freq_caps caps;
+ u32 rp_state_limits;
+ u32 gt_perf_status;
+ u32 rpmodectl, rpinclimit, rpdeclimit;
+ u32 rpstat, cagf, reqf;
+ u32 rpcurupei, rpcurup, rpprevup;
+ u32 rpcurdownei, rpcurdown, rpprevdown;
+ u32 rpupei, rpupt, rpdownei, rpdownt;
+ u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
+
+ rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS);
+ gen6_rps_get_freq_caps(rps, &caps);
+ if (IS_GEN9_LP(i915))
+ gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS);
+ else
+ gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS);
+
+ /* RPSTAT1 is in the GT power well */
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+ reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ);
+ if (GRAPHICS_VER(i915) >= 9) {
+ reqf >>= 23;
+ } else {
+ reqf &= ~GEN6_TURBO_DISABLE;
+ if (IS_HASWELL(i915) || IS_BROADWELL(i915))
+ reqf >>= 24;
+ else
+ reqf >>= 25;
+ }
+ reqf = intel_gpu_freq(rps, reqf);
+
+ rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL);
+ rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
+ rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
+
+ rpstat = intel_rps_read_rpstat(rps);
+ rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
+ rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK;
+ rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK;
+ rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
+ rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK;
+ rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK;
+
+ rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI);
+ rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
+
+ rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
+ rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
+
+ cagf = intel_rps_read_actual_frequency(rps);
+
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+
+ if (GRAPHICS_VER(i915) >= 11) {
+ pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE);
+ pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK);
+ /*
+ * The equivalent to the PM ISR & IIR cannot be read
+ * without affecting the current state of the system
+ */
+ pm_isr = 0;
+ pm_iir = 0;
+ } else if (GRAPHICS_VER(i915) >= 8) {
+ pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2));
+ pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2));
+ pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2));
+ pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2));
+ } else {
+ pm_ier = intel_uncore_read(uncore, GEN6_PMIER);
+ pm_imr = intel_uncore_read(uncore, GEN6_PMIMR);
+ pm_isr = intel_uncore_read(uncore, GEN6_PMISR);
+ pm_iir = intel_uncore_read(uncore, GEN6_PMIIR);
+ }
+ pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
+
+ drm_printf(p, "Video Turbo Mode: %s\n",
+ str_yes_no(rpmodectl & GEN6_RP_MEDIA_TURBO));
+ drm_printf(p, "HW control enabled: %s\n",
+ str_yes_no(rpmodectl & GEN6_RP_ENABLE));
+ drm_printf(p, "SW control enabled: %s\n",
+ str_yes_no((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE));
+
+ drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
+ pm_ier, pm_imr, pm_mask);
+ if (GRAPHICS_VER(i915) <= 10)
+ drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n",
+ pm_isr, pm_iir);
+ drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
+ rps->pm_intrmsk_mbz);
+ drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
+ drm_printf(p, "Render p-state ratio: %d\n",
+ (gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8);
+ drm_printf(p, "Render p-state VID: %d\n",
+ gt_perf_status & 0xff);
+ drm_printf(p, "Render p-state limit: %d\n",
+ rp_state_limits & 0xff);
+ drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat);
+ drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl);
+ drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit);
+ drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
+ drm_printf(p, "RPNSWREQ: %dMHz\n", reqf);
+ drm_printf(p, "CAGF: %dMHz\n", cagf);
+ drm_printf(p, "RP CUR UP EI: %d (%lldns)\n",
+ rpcurupei,
+ intel_gt_pm_interval_to_ns(gt, rpcurupei));
+ drm_printf(p, "RP CUR UP: %d (%lldns)\n",
+ rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup));
+ drm_printf(p, "RP PREV UP: %d (%lldns)\n",
+ rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup));
+ drm_printf(p, "Up threshold: %d%%\n",
+ rps->power.up_threshold);
+ drm_printf(p, "RP UP EI: %d (%lldns)\n",
+ rpupei, intel_gt_pm_interval_to_ns(gt, rpupei));
+ drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n",
+ rpupt, intel_gt_pm_interval_to_ns(gt, rpupt));
+
+ drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n",
+ rpcurdownei,
+ intel_gt_pm_interval_to_ns(gt, rpcurdownei));
+ drm_printf(p, "RP CUR DOWN: %d (%lldns)\n",
+ rpcurdown,
+ intel_gt_pm_interval_to_ns(gt, rpcurdown));
+ drm_printf(p, "RP PREV DOWN: %d (%lldns)\n",
+ rpprevdown,
+ intel_gt_pm_interval_to_ns(gt, rpprevdown));
+ drm_printf(p, "Down threshold: %d%%\n",
+ rps->power.down_threshold);
+ drm_printf(p, "RP DOWN EI: %d (%lldns)\n",
+ rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei));
+ drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n",
+ rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt));
+
+ drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
+ intel_gpu_freq(rps, caps.min_freq));
+ drm_printf(p, "Nominal (RP1) frequency: %dMHz\n",
+ intel_gpu_freq(rps, caps.rp1_freq));
+ drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n",
+ intel_gpu_freq(rps, caps.rp0_freq));
+ drm_printf(p, "Max overclocked frequency: %dMHz\n",
+ intel_gpu_freq(rps, rps->max_freq));
+
+ drm_printf(p, "Current freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->cur_freq));
+ drm_printf(p, "Actual freq: %d MHz\n", cagf);
+ drm_printf(p, "Idle freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->idle_freq));
+ drm_printf(p, "Min freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->min_freq));
+ drm_printf(p, "Boost freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->boost_freq));
+ drm_printf(p, "Max freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->max_freq));
+ drm_printf(p,
+ "efficient (RPe) frequency: %d MHz\n",
+ intel_gpu_freq(rps, rps->efficient_freq));
+}
+
+static void slpc_frequency_dump(struct intel_rps *rps, struct drm_printer *p)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+ struct intel_uncore *uncore = gt->uncore;
+ struct intel_rps_freq_caps caps;
+ u32 pm_mask;
+
+ gen6_rps_get_freq_caps(rps, &caps);
+ pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
+
+ drm_printf(p, "PM MASK=0x%08x\n", pm_mask);
+ drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
+ rps->pm_intrmsk_mbz);
+ drm_printf(p, "RPSTAT1: 0x%08x\n", intel_rps_read_rpstat(rps));
+ drm_printf(p, "RPNSWREQ: %dMHz\n", intel_rps_get_requested_frequency(rps));
+ drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
+ intel_gpu_freq(rps, caps.min_freq));
+ drm_printf(p, "Nominal (RP1) frequency: %dMHz\n",
+ intel_gpu_freq(rps, caps.rp1_freq));
+ drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n",
+ intel_gpu_freq(rps, caps.rp0_freq));
+ drm_printf(p, "Current freq: %d MHz\n",
+ intel_rps_get_requested_frequency(rps));
+ drm_printf(p, "Actual freq: %d MHz\n",
+ intel_rps_read_actual_frequency(rps));
+ drm_printf(p, "Min freq: %d MHz\n",
+ intel_rps_get_min_frequency(rps));
+ drm_printf(p, "Boost freq: %d MHz\n",
+ intel_rps_get_boost_frequency(rps));
+ drm_printf(p, "Max freq: %d MHz\n",
+ intel_rps_get_max_frequency(rps));
+ drm_printf(p,
+ "efficient (RPe) frequency: %d MHz\n",
+ intel_gpu_freq(rps, caps.rp1_freq));
+}
+
+void gen6_rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p)
+{
+ if (rps_uses_slpc(rps))
+ return slpc_frequency_dump(rps, p);
+ else
+ return rps_frequency_dump(rps, p);
+}
+
static int set_max_freq(struct intel_rps *rps, u32 val)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
@@ -2170,6 +2526,31 @@ u32 intel_rps_get_min_frequency(struct intel_rps *rps)
return intel_gpu_freq(rps, rps->min_freq_softlimit);
}
+/**
+ * intel_rps_get_min_raw_freq - returns the min frequency in some raw format.
+ * @rps: the intel_rps structure
+ *
+ * Returns the min frequency in a raw format. In newer platforms raw is in
+ * units of 50 MHz.
+ */
+u32 intel_rps_get_min_raw_freq(struct intel_rps *rps)
+{
+ struct intel_guc_slpc *slpc = rps_to_slpc(rps);
+ u32 freq;
+
+ if (rps_uses_slpc(rps)) {
+ return DIV_ROUND_CLOSEST(slpc->min_freq,
+ GT_FREQUENCY_MULTIPLIER);
+ } else {
+ freq = rps->min_freq;
+ if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) {
+ /* Convert GT frequency to 50 MHz units */
+ freq /= GEN9_FREQ_SCALER;
+ }
+ return freq;
+ }
+}
+
static int set_min_freq(struct intel_rps *rps, u32 val)
{
int ret = 0;
@@ -2213,17 +2594,56 @@ int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val)
return set_min_freq(rps, val);
}
-u32 intel_rps_read_state_cap(struct intel_rps *rps)
+u8 intel_rps_get_up_threshold(struct intel_rps *rps)
{
- struct drm_i915_private *i915 = rps_to_i915(rps);
- struct intel_uncore *uncore = rps_to_uncore(rps);
+ return rps->power.up_threshold;
+}
- if (IS_XEHPSDV(i915))
- return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP);
- else if (IS_GEN9_LP(i915))
- return intel_uncore_read(uncore, BXT_RP_STATE_CAP);
- else
- return intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
+static int rps_set_threshold(struct intel_rps *rps, u8 *threshold, u8 val)
+{
+ int ret;
+
+ if (val > 100)
+ return -EINVAL;
+
+ ret = mutex_lock_interruptible(&rps->lock);
+ if (ret)
+ return ret;
+
+ if (*threshold == val)
+ goto out_unlock;
+
+ *threshold = val;
+
+ /* Force reset. */
+ rps->last_freq = -1;
+ mutex_lock(&rps->power.mutex);
+ rps->power.mode = -1;
+ mutex_unlock(&rps->power.mutex);
+
+ intel_rps_set(rps, clamp(rps->cur_freq,
+ rps->min_freq_softlimit,
+ rps->max_freq_softlimit));
+
+out_unlock:
+ mutex_unlock(&rps->lock);
+
+ return ret;
+}
+
+int intel_rps_set_up_threshold(struct intel_rps *rps, u8 threshold)
+{
+ return rps_set_threshold(rps, &rps->power.up_threshold, threshold);
+}
+
+u8 intel_rps_get_down_threshold(struct intel_rps *rps)
+{
+ return rps->power.down_threshold;
+}
+
+int intel_rps_set_down_threshold(struct intel_rps *rps, u8 threshold)
+{
+ return rps_set_threshold(rps, &rps->power.down_threshold, threshold);
}
static void intel_rps_set_manual(struct intel_rps *rps, bool enable)
@@ -2238,18 +2658,18 @@ static void intel_rps_set_manual(struct intel_rps *rps, bool enable)
void intel_rps_raise_unslice(struct intel_rps *rps)
{
struct intel_uncore *uncore = rps_to_uncore(rps);
- u32 rp0_unslice_req;
mutex_lock(&rps->lock);
if (rps_uses_slpc(rps)) {
/* RP limits have not been initialized yet for SLPC path */
- rp0_unslice_req = ((intel_rps_read_state_cap(rps) >> 0)
- & 0xff) * GEN9_FREQ_SCALER;
+ struct intel_rps_freq_caps caps;
+
+ gen6_rps_get_freq_caps(rps, &caps);
intel_rps_set_manual(rps, true);
intel_uncore_write(uncore, GEN6_RPNSWREQ,
- ((rp0_unslice_req <<
+ ((caps.rp0_freq <<
GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) |
GEN9_IGNORE_SLICE_RATIO));
intel_rps_set_manual(rps, false);
@@ -2263,18 +2683,18 @@ void intel_rps_raise_unslice(struct intel_rps *rps)
void intel_rps_lower_unslice(struct intel_rps *rps)
{
struct intel_uncore *uncore = rps_to_uncore(rps);
- u32 rpn_unslice_req;
mutex_lock(&rps->lock);
if (rps_uses_slpc(rps)) {
/* RP limits have not been initialized yet for SLPC path */
- rpn_unslice_req = ((intel_rps_read_state_cap(rps) >> 16)
- & 0xff) * GEN9_FREQ_SCALER;
+ struct intel_rps_freq_caps caps;
+
+ gen6_rps_get_freq_caps(rps, &caps);
intel_rps_set_manual(rps, true);
intel_uncore_write(uncore, GEN6_RPNSWREQ,
- ((rpn_unslice_req <<
+ ((caps.min_freq <<
GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) |
GEN9_IGNORE_SLICE_RATIO));
intel_rps_set_manual(rps, false);
@@ -2285,11 +2705,29 @@ void intel_rps_lower_unslice(struct intel_rps *rps)
mutex_unlock(&rps->lock);
}
+static u32 rps_read_mmio(struct intel_rps *rps, i915_reg_t reg32)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+ intel_wakeref_t wakeref;
+ u32 val;
+
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+ val = intel_uncore_read(gt->uncore, reg32);
+
+ return val;
+}
+
+bool rps_read_mask_mmio(struct intel_rps *rps,
+ i915_reg_t reg32, u32 mask)
+{
+ return rps_read_mmio(rps, reg32) & mask;
+}
+
/* External interface for intel_ips.ko */
static struct drm_i915_private __rcu *ips_mchdev;
-/**
+/*
* Tells the intel_ips driver that the i915 driver is now loaded, if
* IPS got loaded first.
*