summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_guc_pc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe/xe_guc_pc.c')
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c659
1 files changed, 501 insertions, 158 deletions
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index e8b9faeaef64..951a49fb1d3e 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -5,9 +5,16 @@
#include "xe_guc_pc.h"
+#include <linux/cleanup.h>
#include <linux/delay.h>
+#include <linux/iopoll.h>
+#include <linux/jiffies.h>
+#include <linux/ktime.h>
+#include <linux/wait_bit.h>
#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+#include <generated/xe_device_wa_oob.h>
#include <generated/xe_wa_oob.h>
#include "abi/guc_actions_slpc_abi.h"
@@ -19,6 +26,7 @@
#include "xe_gt.h"
#include "xe_gt_idle.h"
#include "xe_gt_printk.h"
+#include "xe_gt_throttle.h"
#include "xe_gt_types.h"
#include "xe_guc.h"
#include "xe_guc_ct.h"
@@ -38,6 +46,7 @@
#define FREQ_INFO_REC XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5ef0)
#define RPE_MASK REG_GENMASK(15, 8)
+#define RPA_MASK REG_GENMASK(31, 16)
#define GT_PERF_STATUS XE_REG(0x1381b4)
#define CAGF_MASK REG_GENMASK(19, 11)
@@ -47,6 +56,12 @@
#define LNL_MERT_FREQ_CAP 800
#define BMG_MERT_FREQ_CAP 2133
+#define BMG_MIN_FREQ 1200
+#define BMG_MERT_FLUSH_FREQ_CAP 2600
+
+#define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */
+#define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */
+#define SLPC_ACT_FREQ_TIMEOUT_MS 100
/**
* DOC: GuC Power Conservation (PC)
@@ -66,6 +81,11 @@
* Xe driver enables SLPC with all of its defaults features and frequency
* selection, which varies per platform.
*
+ * Power profiles add another level of control to SLPC. When power saving
+ * profile is chosen, SLPC will use conservative thresholds to ramp frequency,
+ * thus saving power. Base profile is default and ensures balanced performance
+ * for any workload.
+ *
* Render-C States:
* ================
*
@@ -112,25 +132,37 @@ static struct iosys_map *pc_to_maps(struct xe_guc_pc *pc)
FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, count))
static int wait_for_pc_state(struct xe_guc_pc *pc,
- enum slpc_global_state state)
+ enum slpc_global_state target_state,
+ int timeout_ms)
{
- int timeout_us = 5000; /* rought 5ms, but no need for precision */
- int slept, wait = 10;
+ enum slpc_global_state state;
xe_device_assert_mem_access(pc_to_xe(pc));
- for (slept = 0; slept < timeout_us;) {
- if (slpc_shared_data_read(pc, header.global_state) == state)
- return 0;
+ return poll_timeout_us(state = slpc_shared_data_read(pc, header.global_state),
+ state == target_state,
+ 20, timeout_ms * USEC_PER_MSEC, false);
+}
- usleep_range(wait, wait << 1);
- slept += wait;
- wait <<= 1;
- if (slept + wait > timeout_us)
- wait = timeout_us - slept;
- }
+static int wait_for_flush_complete(struct xe_guc_pc *pc)
+{
+ const unsigned long timeout = msecs_to_jiffies(30);
+
+ if (!wait_var_event_timeout(&pc->flush_freq_limit,
+ !atomic_read(&pc->flush_freq_limit),
+ timeout))
+ return -ETIMEDOUT;
+
+ return 0;
+}
- return -ETIMEDOUT;
+static int wait_for_act_freq_max_limit(struct xe_guc_pc *pc, u32 max_limit)
+{
+ u32 freq;
+
+ return poll_timeout_us(freq = xe_guc_pc_get_act_freq(pc),
+ freq <= max_limit,
+ 20, SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC, false);
}
static int pc_action_reset(struct xe_guc_pc *pc)
@@ -145,7 +177,7 @@ static int pc_action_reset(struct xe_guc_pc *pc)
int ret;
ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
- if (ret)
+ if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED))
xe_gt_err(pc_to_gt(pc), "GuC PC reset failed: %pe\n",
ERR_PTR(ret));
@@ -163,12 +195,13 @@ static int pc_action_query_task_state(struct xe_guc_pc *pc)
};
int ret;
- if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING))
+ if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
+ SLPC_RESET_TIMEOUT_MS))
return -EAGAIN;
/* Blocking here to ensure the results are ready before reading them */
ret = xe_guc_ct_send_block(ct, action, ARRAY_SIZE(action));
- if (ret)
+ if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED))
xe_gt_err(pc_to_gt(pc), "GuC PC query task state failed: %pe\n",
ERR_PTR(ret));
@@ -186,11 +219,12 @@ static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value)
};
int ret;
- if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING))
+ if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
+ SLPC_RESET_TIMEOUT_MS))
return -EAGAIN;
ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
- if (ret)
+ if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED))
xe_gt_err(pc_to_gt(pc), "GuC PC set param[%u]=%u failed: %pe\n",
id, value, ERR_PTR(ret));
@@ -207,11 +241,12 @@ static int pc_action_unset_param(struct xe_guc_pc *pc, u8 id)
struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
int ret;
- if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING))
+ if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
+ SLPC_RESET_TIMEOUT_MS))
return -EAGAIN;
ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
- if (ret)
+ if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED))
xe_gt_err(pc_to_gt(pc), "GuC PC unset param failed: %pe",
ERR_PTR(ret));
@@ -228,7 +263,7 @@ static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode)
int ret;
ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
- if (ret)
+ if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED))
xe_gt_err(pc_to_gt(pc), "GuC RC enable mode=%u failed: %pe\n",
mode, ERR_PTR(ret));
return ret;
@@ -296,7 +331,7 @@ static int pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
* Our goal is to have the admin choices respected.
*/
pc_action_set_param(pc, SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
- freq < pc->rpe_freq);
+ freq < xe_guc_pc_get_rpe_freq(pc));
return pc_action_set_param(pc,
SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
@@ -328,54 +363,79 @@ static int pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
freq);
}
-static void mtl_update_rpe_value(struct xe_guc_pc *pc)
+static u32 mtl_get_rpa_freq(struct xe_guc_pc *pc)
{
struct xe_gt *gt = pc_to_gt(pc);
u32 reg;
if (xe_gt_is_media_type(gt))
- reg = xe_mmio_read32(&gt->mmio, MTL_MPE_FREQUENCY);
+ reg = xe_mmio_read32(&gt->mmio, MTL_MPA_FREQUENCY);
else
- reg = xe_mmio_read32(&gt->mmio, MTL_GT_RPE_FREQUENCY);
+ reg = xe_mmio_read32(&gt->mmio, MTL_GT_RPA_FREQUENCY);
- pc->rpe_freq = decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg));
+ return decode_freq(REG_FIELD_GET(MTL_RPA_MASK, reg));
}
-static void tgl_update_rpe_value(struct xe_guc_pc *pc)
+static u32 mtl_get_rpe_freq(struct xe_guc_pc *pc)
{
struct xe_gt *gt = pc_to_gt(pc);
- struct xe_device *xe = gt_to_xe(gt);
u32 reg;
+ if (xe_gt_is_media_type(gt))
+ reg = xe_mmio_read32(&gt->mmio, MTL_MPE_FREQUENCY);
+ else
+ reg = xe_mmio_read32(&gt->mmio, MTL_GT_RPE_FREQUENCY);
+
+ return decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg));
+}
+
+static u32 pvc_get_rpa_freq(struct xe_guc_pc *pc)
+{
/*
- * For PVC we still need to use fused RP1 as the approximation for RPe
- * For other platforms than PVC we get the resolved RPe directly from
+ * For PVC we still need to use fused RP0 as the approximation for RPa
+ * For other platforms than PVC we get the resolved RPa directly from
* PCODE at a different register
*/
- if (xe->info.platform == XE_PVC)
- reg = xe_mmio_read32(&gt->mmio, PVC_RP_STATE_CAP);
- else
- reg = xe_mmio_read32(&gt->mmio, FREQ_INFO_REC);
- pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+ struct xe_gt *gt = pc_to_gt(pc);
+ u32 reg;
+
+ reg = xe_mmio_read32(&gt->mmio, PVC_RP_STATE_CAP);
+ return REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
}
-static void pc_update_rp_values(struct xe_guc_pc *pc)
+static u32 tgl_get_rpa_freq(struct xe_guc_pc *pc)
{
struct xe_gt *gt = pc_to_gt(pc);
- struct xe_device *xe = gt_to_xe(gt);
+ u32 reg;
- if (GRAPHICS_VERx100(xe) >= 1270)
- mtl_update_rpe_value(pc);
- else
- tgl_update_rpe_value(pc);
+ reg = xe_mmio_read32(&gt->mmio, FREQ_INFO_REC);
+ return REG_FIELD_GET(RPA_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+}
+
+static u32 pvc_get_rpe_freq(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+ u32 reg;
+
+ /*
+ * For PVC we still need to use fused RP1 as the approximation for RPe
+ */
+ reg = xe_mmio_read32(&gt->mmio, PVC_RP_STATE_CAP);
+ return REG_FIELD_GET(RP1_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+}
+
+static u32 tgl_get_rpe_freq(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+ u32 reg;
/*
- * RPe is decided at runtime by PCODE. In the rare case where that's
- * smaller than the fused min, we will trust the PCODE and use that
- * as our minimum one.
+ * For other platforms than PVC, we get the resolved RPe directly from
+ * PCODE at a different register
*/
- pc->rpn_freq = min(pc->rpn_freq, pc->rpe_freq);
+ reg = xe_mmio_read32(&gt->mmio, FREQ_INFO_REC);
+ return REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
}
/**
@@ -404,6 +464,30 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc)
return freq;
}
+static u32 get_cur_freq(struct xe_gt *gt)
+{
+ u32 freq;
+
+ freq = xe_mmio_read32(&gt->mmio, RPNSWREQ);
+ freq = REG_FIELD_GET(REQ_RATIO_MASK, freq);
+ return decode_freq(freq);
+}
+
+/**
+ * xe_guc_pc_get_cur_freq_fw - With fw held, get requested frequency
+ * @pc: The GuC PC
+ *
+ * Returns: the requested frequency for that GT instance
+ */
+u32 xe_guc_pc_get_cur_freq_fw(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+
+ xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+ return get_cur_freq(gt);
+}
+
/**
* xe_guc_pc_get_cur_freq - Get Current requested frequency
* @pc: The GuC PC
@@ -421,16 +505,13 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq)
* GuC SLPC plays with cur freq request when GuCRC is enabled
* Block RC6 for a more reliable read.
*/
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) {
xe_force_wake_put(gt_to_fw(gt), fw_ref);
return -ETIMEDOUT;
}
- *freq = xe_mmio_read32(&gt->mmio, RPNSWREQ);
-
- *freq = REG_FIELD_GET(REQ_RATIO_MASK, *freq);
- *freq = decode_freq(*freq);
+ *freq = get_cur_freq(gt);
xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
@@ -448,6 +529,25 @@ u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc)
}
/**
+ * xe_guc_pc_get_rpa_freq - Get the RPa freq
+ * @pc: The GuC PC
+ *
+ * Returns: RPa freq.
+ */
+u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+ struct xe_device *xe = gt_to_xe(gt);
+
+ if (GRAPHICS_VERx100(xe) == 1260)
+ return pvc_get_rpa_freq(pc);
+ else if (GRAPHICS_VERx100(xe) >= 1270)
+ return mtl_get_rpa_freq(pc);
+ else
+ return tgl_get_rpa_freq(pc);
+}
+
+/**
* xe_guc_pc_get_rpe_freq - Get the RPe freq
* @pc: The GuC PC
*
@@ -455,9 +555,17 @@ u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc)
*/
u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc)
{
- pc_update_rp_values(pc);
+ struct xe_device *xe = pc_to_xe(pc);
+ u32 freq;
+
+ if (GRAPHICS_VERx100(xe) == 1260)
+ freq = pvc_get_rpe_freq(pc);
+ else if (GRAPHICS_VERx100(xe) >= 1270)
+ freq = mtl_get_rpe_freq(pc);
+ else
+ freq = tgl_get_rpe_freq(pc);
- return pc->rpe_freq;
+ return freq;
}
/**
@@ -471,6 +579,25 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc)
return pc->rpn_freq;
}
+static int xe_guc_pc_get_min_freq_locked(struct xe_guc_pc *pc, u32 *freq)
+{
+ int ret;
+
+ lockdep_assert_held(&pc->freq_lock);
+
+ /* Might be in the middle of a gt reset */
+ if (!pc->freq_ready)
+ return -EAGAIN;
+
+ ret = pc_action_query_task_state(pc);
+ if (ret)
+ return ret;
+
+ *freq = pc_get_min_freq(pc);
+
+ return 0;
+}
+
/**
* xe_guc_pc_get_min_freq - Get the min operational frequency
* @pc: The GuC PC
@@ -481,38 +608,28 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc)
*/
int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
{
- struct xe_gt *gt = pc_to_gt(pc);
- unsigned int fw_ref;
+ guard(mutex)(&pc->freq_lock);
+
+ return xe_guc_pc_get_min_freq_locked(pc, freq);
+}
+
+static int xe_guc_pc_set_min_freq_locked(struct xe_guc_pc *pc, u32 freq)
+{
int ret;
- mutex_lock(&pc->freq_lock);
- if (!pc->freq_ready) {
- /* Might be in the middle of a gt reset */
- ret = -EAGAIN;
- goto out;
- }
+ lockdep_assert_held(&pc->freq_lock);
- /*
- * GuC SLPC plays with min freq request when GuCRC is enabled
- * Block RC6 for a more reliable read.
- */
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
- ret = -ETIMEDOUT;
- goto fw;
- }
+ /* Might be in the middle of a gt reset */
+ if (!pc->freq_ready)
+ return -EAGAIN;
- ret = pc_action_query_task_state(pc);
+ ret = pc_set_min_freq(pc, freq);
if (ret)
- goto fw;
+ return ret;
- *freq = pc_get_min_freq(pc);
+ pc->user_requested_min = freq;
-fw:
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
-out:
- mutex_unlock(&pc->freq_lock);
- return ret;
+ return 0;
}
/**
@@ -526,24 +643,28 @@ out:
*/
int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
{
+ guard(mutex)(&pc->freq_lock);
+
+ return xe_guc_pc_set_min_freq_locked(pc, freq);
+}
+
+static int xe_guc_pc_get_max_freq_locked(struct xe_guc_pc *pc, u32 *freq)
+{
int ret;
- mutex_lock(&pc->freq_lock);
- if (!pc->freq_ready) {
- /* Might be in the middle of a gt reset */
- ret = -EAGAIN;
- goto out;
- }
+ lockdep_assert_held(&pc->freq_lock);
- ret = pc_set_min_freq(pc, freq);
+ /* Might be in the middle of a gt reset */
+ if (!pc->freq_ready)
+ return -EAGAIN;
+
+ ret = pc_action_query_task_state(pc);
if (ret)
- goto out;
+ return ret;
- pc->user_requested_min = freq;
+ *freq = pc_get_max_freq(pc);
-out:
- mutex_unlock(&pc->freq_lock);
- return ret;
+ return 0;
}
/**
@@ -556,24 +677,28 @@ out:
*/
int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq)
{
+ guard(mutex)(&pc->freq_lock);
+
+ return xe_guc_pc_get_max_freq_locked(pc, freq);
+}
+
+static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq)
+{
int ret;
- mutex_lock(&pc->freq_lock);
- if (!pc->freq_ready) {
- /* Might be in the middle of a gt reset */
- ret = -EAGAIN;
- goto out;
- }
+ lockdep_assert_held(&pc->freq_lock);
- ret = pc_action_query_task_state(pc);
+ /* Might be in the middle of a gt reset */
+ if (!pc->freq_ready)
+ return -EAGAIN;
+
+ ret = pc_set_max_freq(pc, freq);
if (ret)
- goto out;
+ return ret;
- *freq = pc_get_max_freq(pc);
+ pc->user_requested_max = freq;
-out:
- mutex_unlock(&pc->freq_lock);
- return ret;
+ return 0;
}
/**
@@ -587,24 +712,14 @@ out:
*/
int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
{
- int ret;
-
- mutex_lock(&pc->freq_lock);
- if (!pc->freq_ready) {
- /* Might be in the middle of a gt reset */
- ret = -EAGAIN;
- goto out;
+ if (XE_GT_WA(pc_to_gt(pc), 22019338487)) {
+ if (wait_for_flush_complete(pc) != 0)
+ return -EAGAIN;
}
- ret = pc_set_max_freq(pc, freq);
- if (ret)
- goto out;
+ guard(mutex)(&pc->freq_lock);
- pc->user_requested_max = freq;
-
-out:
- mutex_unlock(&pc->freq_lock);
- return ret;
+ return xe_guc_pc_set_max_freq_locked(pc, freq);
}
/**
@@ -710,7 +825,7 @@ static u32 pc_max_freq_cap(struct xe_guc_pc *pc)
{
struct xe_gt *gt = pc_to_gt(pc);
- if (XE_WA(gt, 22019338487)) {
+ if (XE_GT_WA(gt, 22019338487)) {
if (xe_gt_is_media_type(gt))
return min(LNL_MERT_FREQ_CAP, pc->rp0_freq);
else
@@ -747,6 +862,7 @@ void xe_guc_pc_init_early(struct xe_guc_pc *pc)
static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
{
+ struct xe_tile *tile = gt_to_tile(pc_to_gt(pc));
int ret;
lockdep_assert_held(&pc->freq_lock);
@@ -773,6 +889,9 @@ static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
if (pc_get_min_freq(pc) > pc->rp0_freq)
ret = pc_set_min_freq(pc, pc->rp0_freq);
+ if (XE_DEVICE_WA(tile_to_xe(tile), 14022085890))
+ ret = pc_set_min_freq(pc, max(BMG_MIN_FREQ, pc_get_min_freq(pc)));
+
out:
return ret;
}
@@ -798,30 +917,117 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
return ret;
}
-static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
+static bool needs_flush_freq_limit(struct xe_guc_pc *pc)
{
- int ret = 0;
+ struct xe_gt *gt = pc_to_gt(pc);
- if (XE_WA(pc_to_gt(pc), 22019338487)) {
- /*
- * Get updated min/max and stash them.
- */
- ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq);
- if (!ret)
- ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq);
- if (ret)
- return ret;
+ return XE_GT_WA(gt, 22019338487) &&
+ pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP;
+}
+
+/**
+ * xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush
+ * @pc: the xe_guc_pc object
+ *
+ * As per the WA, reduce max GT frequency during L2 cache flush
+ */
+void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+ u32 max_freq;
+ int ret;
+
+ if (!needs_flush_freq_limit(pc))
+ return;
+
+ guard(mutex)(&pc->freq_lock);
+
+ ret = xe_guc_pc_get_max_freq_locked(pc, &max_freq);
+ if (!ret && max_freq > BMG_MERT_FLUSH_FREQ_CAP) {
+ ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP);
+ if (ret) {
+ xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n",
+ BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
+ return;
+ }
+
+ atomic_set(&pc->flush_freq_limit, 1);
/*
- * Ensure min and max are bound by MERT_FREQ_CAP until driver loads.
+ * If user has previously changed max freq, stash that value to
+ * restore later, otherwise use the current max. New user
+ * requests wait on flush.
*/
- mutex_lock(&pc->freq_lock);
- ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc)));
- if (!ret)
- ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc)));
- mutex_unlock(&pc->freq_lock);
+ if (pc->user_requested_max != 0)
+ pc->stashed_max_freq = pc->user_requested_max;
+ else
+ pc->stashed_max_freq = max_freq;
}
+ /*
+ * Wait for actual freq to go below the flush cap: even if the previous
+ * max was below cap, the current one might still be above it
+ */
+ ret = wait_for_act_freq_max_limit(pc, BMG_MERT_FLUSH_FREQ_CAP);
+ if (ret)
+ xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n",
+ BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
+}
+
+/**
+ * xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes.
+ * @pc: the xe_guc_pc object
+ *
+ * Retrieve the previous GT max frequency value.
+ */
+void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+ int ret = 0;
+
+ if (!needs_flush_freq_limit(pc))
+ return;
+
+ if (!atomic_read(&pc->flush_freq_limit))
+ return;
+
+ mutex_lock(&pc->freq_lock);
+
+ ret = pc_set_max_freq(&gt->uc.guc.pc, pc->stashed_max_freq);
+ if (ret)
+ xe_gt_err_once(gt, "Failed to restore max freq %u:%d",
+ pc->stashed_max_freq, ret);
+
+ atomic_set(&pc->flush_freq_limit, 0);
+ mutex_unlock(&pc->freq_lock);
+ wake_up_var(&pc->flush_freq_limit);
+}
+
+static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
+{
+ int ret;
+
+ if (!XE_GT_WA(pc_to_gt(pc), 22019338487))
+ return 0;
+
+ guard(mutex)(&pc->freq_lock);
+
+ /*
+ * Get updated min/max and stash them.
+ */
+ ret = xe_guc_pc_get_min_freq_locked(pc, &pc->stashed_min_freq);
+ if (!ret)
+ ret = xe_guc_pc_get_max_freq_locked(pc, &pc->stashed_max_freq);
+ if (ret)
+ return ret;
+
+ /*
+ * Ensure min and max are bound by MERT_FREQ_CAP until driver loads.
+ */
+ ret = pc_set_min_freq(pc, min(xe_guc_pc_get_rpe_freq(pc), pc_max_freq_cap(pc)));
+ if (!ret)
+ ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc)));
+
return ret;
}
@@ -860,7 +1066,6 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc)
{
struct xe_device *xe = pc_to_xe(pc);
struct xe_gt *gt = pc_to_gt(pc);
- unsigned int fw_ref;
int ret = 0;
if (xe->info.skip_guc_pc)
@@ -870,17 +1075,7 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc)
if (ret)
return ret;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
- return -ETIMEDOUT;
- }
-
- xe_gt_idle_disable_c6(gt);
-
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
-
- return 0;
+ return xe_gt_idle_disable_c6(gt);
}
/**
@@ -940,8 +1135,6 @@ static int pc_init_freqs(struct xe_guc_pc *pc)
if (ret)
goto out;
- pc_update_rp_values(pc);
-
pc_init_pcode_freq(pc);
/*
@@ -955,6 +1148,72 @@ out:
return ret;
}
+static int pc_action_set_strategy(struct xe_guc_pc *pc, u32 val)
+{
+ int ret = 0;
+
+ ret = pc_action_set_param(pc,
+ SLPC_PARAM_STRATEGIES,
+ val);
+
+ return ret;
+}
+
+static const char *power_profile_to_string(struct xe_guc_pc *pc)
+{
+ switch (pc->power_profile) {
+ case SLPC_POWER_PROFILE_BASE:
+ return "base";
+ case SLPC_POWER_PROFILE_POWER_SAVING:
+ return "power_saving";
+ default:
+ return "invalid";
+ }
+}
+
+void xe_guc_pc_get_power_profile(struct xe_guc_pc *pc, char *profile)
+{
+ switch (pc->power_profile) {
+ case SLPC_POWER_PROFILE_BASE:
+ sprintf(profile, "[%s] %s\n", "base", "power_saving");
+ break;
+ case SLPC_POWER_PROFILE_POWER_SAVING:
+ sprintf(profile, "%s [%s]\n", "base", "power_saving");
+ break;
+ default:
+ sprintf(profile, "invalid");
+ }
+}
+
+int xe_guc_pc_set_power_profile(struct xe_guc_pc *pc, const char *buf)
+{
+ int ret = 0;
+ u32 val;
+
+ if (strncmp("base", buf, strlen("base")) == 0)
+ val = SLPC_POWER_PROFILE_BASE;
+ else if (strncmp("power_saving", buf, strlen("power_saving")) == 0)
+ val = SLPC_POWER_PROFILE_POWER_SAVING;
+ else
+ return -EINVAL;
+
+ guard(mutex)(&pc->freq_lock);
+ xe_pm_runtime_get_noresume(pc_to_xe(pc));
+
+ ret = pc_action_set_param(pc,
+ SLPC_PARAM_POWER_PROFILE,
+ val);
+ if (ret)
+ xe_gt_err_once(pc_to_gt(pc), "Failed to set power profile to %d: %pe\n",
+ val, ERR_PTR(ret));
+ else
+ pc->power_profile = val;
+
+ xe_pm_runtime_put(pc_to_xe(pc));
+
+ return ret;
+}
+
/**
* xe_guc_pc_start - Start GuC's Power Conservation component
* @pc: Xe_GuC_PC instance
@@ -965,12 +1224,13 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
struct xe_gt *gt = pc_to_gt(pc);
u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
unsigned int fw_ref;
+ ktime_t earlier;
int ret;
xe_gt_assert(gt, xe_device_uc_enabled(xe));
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) {
xe_force_wake_put(gt_to_fw(gt), fw_ref);
return -ETIMEDOUT;
}
@@ -986,17 +1246,29 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
goto out;
}
- memset(pc->bo->vmap.vaddr, 0, size);
+ xe_map_memset(xe, &pc->bo->vmap, 0, 0, size);
slpc_shared_data_write(pc, header.size, size);
+ earlier = ktime_get();
ret = pc_action_reset(pc);
if (ret)
goto out;
- if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) {
- xe_gt_err(gt, "GuC PC Start failed\n");
- ret = -EIO;
- goto out;
+ if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
+ SLPC_RESET_TIMEOUT_MS)) {
+ xe_gt_warn(gt, "GuC PC start taking longer than normal [freq = %dMHz (req = %dMHz), perf_limit_reasons = 0x%08X]\n",
+ xe_guc_pc_get_act_freq(pc), get_cur_freq(gt),
+ xe_gt_throttle_get_limit_reasons(gt));
+
+ if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
+ SLPC_RESET_EXTENDED_TIMEOUT_MS)) {
+ xe_gt_err(gt, "GuC PC Start failed: Dynamic GT frequency control and GT sleep states are now disabled.\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ xe_gt_warn(gt, "GuC PC excessive start time: %lldms",
+ ktime_ms_delta(ktime_get(), earlier));
}
ret = pc_init_freqs(pc);
@@ -1014,6 +1286,16 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
}
ret = pc_action_setup_gucrc(pc, GUCRC_FIRMWARE_CONTROL);
+ if (ret)
+ goto out;
+
+ /* Enable SLPC Optimized Strategy for compute */
+ ret = pc_action_set_strategy(pc, SLPC_OPTIMIZED_STRATEGY_COMPUTE);
+
+ /* Set cached value of power_profile */
+ ret = xe_guc_pc_set_power_profile(pc, power_profile_to_string(pc));
+ if (unlikely(ret))
+ xe_gt_err(gt, "Failed to set SLPC power profile: %pe\n", ERR_PTR(ret));
out:
xe_force_wake_put(gt_to_fw(gt), fw_ref);
@@ -1058,7 +1340,7 @@ static void xe_guc_pc_fini_hw(void *arg)
XE_WARN_ON(xe_guc_pc_stop(pc));
/* Bind requested freq to mert_freq_cap before unload */
- pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), pc->rpe_freq));
+ pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), xe_guc_pc_get_rpe_freq(pc)));
xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), fw_ref);
}
@@ -1086,11 +1368,72 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
bo = xe_managed_bo_create_pin_map(xe, tile, size,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
+ XE_BO_FLAG_GGTT_INVALIDATE |
+ XE_BO_FLAG_PINNED_NORESTORE);
if (IS_ERR(bo))
return PTR_ERR(bo);
pc->bo = bo;
+ pc->power_profile = SLPC_POWER_PROFILE_BASE;
+
return devm_add_action_or_reset(xe->drm.dev, xe_guc_pc_fini_hw, pc);
}
+
+static const char *pc_get_state_string(struct xe_guc_pc *pc)
+{
+ switch (slpc_shared_data_read(pc, header.global_state)) {
+ case SLPC_GLOBAL_STATE_NOT_RUNNING:
+ return "not running";
+ case SLPC_GLOBAL_STATE_INITIALIZING:
+ return "initializing";
+ case SLPC_GLOBAL_STATE_RESETTING:
+ return "resetting";
+ case SLPC_GLOBAL_STATE_RUNNING:
+ return "running";
+ case SLPC_GLOBAL_STATE_SHUTTING_DOWN:
+ return "shutting down";
+ case SLPC_GLOBAL_STATE_ERROR:
+ return "error";
+ default:
+ return "unknown";
+ }
+}
+
+/**
+ * xe_guc_pc_print - Print GuC's Power Conservation information for debug
+ * @pc: Xe_GuC_PC instance
+ * @p: drm_printer
+ */
+void xe_guc_pc_print(struct xe_guc_pc *pc, struct drm_printer *p)
+{
+ drm_printf(p, "SLPC Shared Data Header:\n");
+ drm_printf(p, "\tSize: %x\n", slpc_shared_data_read(pc, header.size));
+ drm_printf(p, "\tGlobal State: %s\n", pc_get_state_string(pc));
+
+ if (pc_action_query_task_state(pc))
+ return;
+
+ drm_printf(p, "\nSLPC Tasks Status:\n");
+ drm_printf(p, "\tGTPERF enabled: %s\n",
+ str_yes_no(slpc_shared_data_read(pc, task_state_data.status) &
+ SLPC_GTPERF_TASK_ENABLED));
+ drm_printf(p, "\tDCC enabled: %s\n",
+ str_yes_no(slpc_shared_data_read(pc, task_state_data.status) &
+ SLPC_DCC_TASK_ENABLED));
+ drm_printf(p, "\tDCC in use: %s\n",
+ str_yes_no(slpc_shared_data_read(pc, task_state_data.status) &
+ SLPC_IN_DCC));
+ drm_printf(p, "\tBalancer enabled: %s\n",
+ str_yes_no(slpc_shared_data_read(pc, task_state_data.status) &
+ SLPC_BALANCER_ENABLED));
+ drm_printf(p, "\tIBC enabled: %s\n",
+ str_yes_no(slpc_shared_data_read(pc, task_state_data.status) &
+ SLPC_IBC_TASK_ENABLED));
+ drm_printf(p, "\tBalancer IA LMT enabled: %s\n",
+ str_yes_no(slpc_shared_data_read(pc, task_state_data.status) &
+ SLPC_BALANCER_IA_LMT_ENABLED));
+ drm_printf(p, "\tBalancer IA LMT active: %s\n",
+ str_yes_no(slpc_shared_data_read(pc, task_state_data.status) &
+ SLPC_BALANCER_IA_LMT_ACTIVE));
+}