summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_guc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe/xe_guc.c')
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c354
1 files changed, 272 insertions, 82 deletions
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 240e7a4bbff1..0bf6e01b8910 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -19,8 +19,11 @@
#include "xe_force_wake.h"
#include "xe_gt.h"
#include "xe_gt_printk.h"
+#include "xe_gt_sriov_vf.h"
+#include "xe_gt_throttle.h"
#include "xe_guc_ads.h"
#include "xe_guc_ct.h"
+#include "xe_guc_db_mgr.h"
#include "xe_guc_hwconfig.h"
#include "xe_guc_log.h"
#include "xe_guc_pc.h"
@@ -239,7 +242,7 @@ static void guc_write_params(struct xe_guc *guc)
xe_mmio_write32(gt, SOFT_SCRATCH(1 + i), guc->params[i]);
}
-static void guc_fini(struct drm_device *drm, void *arg)
+static void guc_fini_hw(void *arg)
{
struct xe_guc *guc = arg;
struct xe_gt *gt = guc_to_gt(guc);
@@ -293,6 +296,23 @@ static int xe_guc_realloc_post_hwconfig(struct xe_guc *guc)
return 0;
}
+static int vf_guc_init(struct xe_guc *guc)
+{
+ int err;
+
+ xe_guc_comm_init_early(guc);
+
+ err = xe_guc_ct_init(&guc->ct);
+ if (err)
+ return err;
+
+ err = xe_guc_relay_init(&guc->relay);
+ if (err)
+ return err;
+
+ return 0;
+}
+
int xe_guc_init(struct xe_guc *guc)
{
struct xe_device *xe = guc_to_xe(guc);
@@ -307,6 +327,13 @@ int xe_guc_init(struct xe_guc *guc)
if (!xe_uc_fw_is_enabled(&guc->fw))
return 0;
+ if (IS_SRIOV_VF(xe)) {
+ ret = vf_guc_init(guc);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
ret = xe_guc_log_init(&guc->log);
if (ret)
goto out;
@@ -323,7 +350,7 @@ int xe_guc_init(struct xe_guc *guc)
if (ret)
goto out;
- ret = drmm_add_action_or_reset(&xe->drm, guc_fini, guc);
+ ret = devm_add_action_or_reset(xe->drm.dev, guc_fini_hw, guc);
if (ret)
goto out;
@@ -340,6 +367,19 @@ out:
return ret;
}
+static int vf_guc_init_post_hwconfig(struct xe_guc *guc)
+{
+ int err;
+
+ err = xe_guc_submit_init(guc, xe_gt_sriov_vf_guc_ids(guc_to_gt(guc)));
+ if (err)
+ return err;
+
+ /* XXX xe_guc_db_mgr_init not needed for now */
+
+ return 0;
+}
+
/**
* xe_guc_init_post_hwconfig - initialize GuC post hwconfig load
* @guc: The GuC object
@@ -350,12 +390,23 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc)
{
int ret;
+ if (IS_SRIOV_VF(guc_to_xe(guc)))
+ return vf_guc_init_post_hwconfig(guc);
+
ret = xe_guc_realloc_post_hwconfig(guc);
if (ret)
return ret;
guc_init_params_post_hwconfig(guc);
+ ret = xe_guc_submit_init(guc, ~0);
+ if (ret)
+ return ret;
+
+ ret = xe_guc_db_mgr_init(&guc->dbm, ~0);
+ if (ret)
+ return ret;
+
ret = xe_guc_pc_init(&guc->pc);
if (ret)
return ret;
@@ -379,6 +430,9 @@ int xe_guc_reset(struct xe_guc *guc)
xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+ if (IS_SRIOV_VF(gt_to_xe(gt)))
+ return xe_gt_sriov_vf_bootstrap(gt);
+
xe_mmio_write32(gt, GDRST, GRDOM_GUC);
ret = xe_mmio_wait32(gt, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false);
@@ -451,58 +505,194 @@ static int guc_xfer_rsa(struct xe_guc *guc)
return 0;
}
-static int guc_wait_ucode(struct xe_guc *guc)
+/*
+ * Check a previously read GuC status register (GUC_STATUS) looking for
+ * known terminal states (either completion or failure) of either the
+ * microkernel status field or the boot ROM status field. Returns +1 for
+ * successful completion, -1 for failure and 0 for any intermediate state.
+ */
+static int guc_load_done(u32 status)
{
- struct xe_gt *gt = guc_to_gt(guc);
- u32 status;
- int ret;
+ u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, status);
+ u32 br_val = REG_FIELD_GET(GS_BOOTROM_MASK, status);
+
+ switch (uk_val) {
+ case XE_GUC_LOAD_STATUS_READY:
+ return 1;
+
+ case XE_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH:
+ case XE_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH:
+ case XE_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE:
+ case XE_GUC_LOAD_STATUS_HWCONFIG_ERROR:
+ case XE_GUC_LOAD_STATUS_DPC_ERROR:
+ case XE_GUC_LOAD_STATUS_EXCEPTION:
+ case XE_GUC_LOAD_STATUS_INIT_DATA_INVALID:
+ case XE_GUC_LOAD_STATUS_MPU_DATA_INVALID:
+ case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
+ return -1;
+ }
+
+ switch (br_val) {
+ case XE_BOOTROM_STATUS_NO_KEY_FOUND:
+ case XE_BOOTROM_STATUS_RSA_FAILED:
+ case XE_BOOTROM_STATUS_PAVPC_FAILED:
+ case XE_BOOTROM_STATUS_WOPCM_FAILED:
+ case XE_BOOTROM_STATUS_LOADLOC_FAILED:
+ case XE_BOOTROM_STATUS_JUMP_FAILED:
+ case XE_BOOTROM_STATUS_RC6CTXCONFIG_FAILED:
+ case XE_BOOTROM_STATUS_MPUMAP_INCORRECT:
+ case XE_BOOTROM_STATUS_EXCEPTION:
+ case XE_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE:
+ return -1;
+ }
+
+ return 0;
+}
+
+static s32 guc_pc_get_cur_freq(struct xe_guc_pc *guc_pc)
+{
+ u32 freq;
+ int ret = xe_guc_pc_get_cur_freq(guc_pc, &freq);
+
+ return ret ? ret : freq;
+}
+/*
+ * Wait for the GuC to start up.
+ *
+ * Measurements indicate this should take no more than 20ms (assuming the GT
+ * clock is at maximum frequency). However, thermal throttling and other issues
+ * can prevent the clock hitting max and thus making the load take significantly
+ * longer. Allow up to 200ms as a safety margin for real world worst case situations.
+ *
+ * However, bugs anywhere from KMD to GuC to PCODE to fan failure in a CI farm can
+ * lead to even longer times. E.g. if the GT is clamped to minimum frequency then
+ * the load times can be in the seconds range. So the timeout is increased for debug
+ * builds to ensure that problems can be correctly analysed. For release builds, the
+ * timeout is kept short so that users don't wait forever to find out that there is a
+ * problem. In either case, if the load took longer than is reasonable even with some
+ * 'sensible' throttling, then flag a warning because something is not right.
+ *
+ * Note that there is a limit on how long an individual usleep_range() can wait for,
+ * hence longer waits require wrapping a shorter wait in a loop.
+ *
+ * Note that the only reason an end user should hit the shorter timeout is in case of
+ * extreme thermal throttling. And a system that is that hot during boot is probably
+ * dead anyway!
+ */
+#if defined(CONFIG_DRM_XE_DEBUG)
+#define GUC_LOAD_RETRY_LIMIT 20
+#else
+#define GUC_LOAD_RETRY_LIMIT 3
+#endif
+#define GUC_LOAD_TIME_WARN_MS 200
+
+static void guc_wait_ucode(struct xe_guc *guc)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_guc_pc *guc_pc = &gt->uc.guc.pc;
+ ktime_t before, after, delta;
+ int load_done;
+ u32 status = 0;
+ int count = 0;
+ u64 delta_ms;
+ u32 before_freq;
+
+ before_freq = xe_guc_pc_get_act_freq(guc_pc);
+ before = ktime_get();
/*
- * Wait for the GuC to start up.
- * NB: Docs recommend not using the interrupt for completion.
- * Measurements indicate this should take no more than 20ms
- * (assuming the GT clock is at maximum frequency). So, a
- * timeout here indicates that the GuC has failed and is unusable.
- * (Higher levels of the driver may decide to reset the GuC and
- * attempt the ucode load again if this happens.)
- *
- * FIXME: There is a known (but exceedingly unlikely) race condition
- * where the asynchronous frequency management code could reduce
- * the GT clock while a GuC reload is in progress (during a full
- * GT reset). A fix is in progress but there are complex locking
- * issues to be resolved. In the meantime bump the timeout to
- * 200ms. Even at slowest clock, this should be sufficient. And
- * in the working case, a larger timeout makes no difference.
+ * Note, can't use any kind of timing information from the call to xe_mmio_wait.
+ * It could return a thousand intermediate stages at random times. Instead, must
+ * manually track the total time taken and locally implement the timeout.
*/
- ret = xe_mmio_wait32(gt, GUC_STATUS, GS_UKERNEL_MASK,
- FIELD_PREP(GS_UKERNEL_MASK, XE_GUC_LOAD_STATUS_READY),
- 200000, &status, false);
+ do {
+ u32 last_status = status & (GS_UKERNEL_MASK | GS_BOOTROM_MASK);
+ int ret;
- if (ret) {
- xe_gt_info(gt, "GuC load failed: status = 0x%08X\n", status);
- xe_gt_info(gt, "GuC status: Reset = %u, BootROM = %#X, UKernel = %#X, MIA = %#X, Auth = %#X\n",
- REG_FIELD_GET(GS_MIA_IN_RESET, status),
- REG_FIELD_GET(GS_BOOTROM_MASK, status),
- REG_FIELD_GET(GS_UKERNEL_MASK, status),
- REG_FIELD_GET(GS_MIA_MASK, status),
- REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
-
- if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
- xe_gt_info(gt, "GuC firmware signature verification failed\n");
- ret = -ENOEXEC;
+ /*
+ * Wait for any change (intermediate or terminal) in the status register.
+ * Note, the return value is a don't care. The only failure code is timeout
+ * but the timeouts need to be accumulated over all the intermediate partial
+ * timeouts rather than allowing a huge timeout each time. So basically, need
+ * to treat a timeout no different to a value change.
+ */
+ ret = xe_mmio_wait32_not(gt, GUC_STATUS, GS_UKERNEL_MASK | GS_BOOTROM_MASK,
+ last_status, 1000 * 1000, &status, false);
+ if (ret < 0)
+ count++;
+ after = ktime_get();
+ delta = ktime_sub(after, before);
+ delta_ms = ktime_to_ms(delta);
+
+ load_done = guc_load_done(status);
+ if (load_done != 0)
+ break;
+
+ if (delta_ms >= (GUC_LOAD_RETRY_LIMIT * 1000))
+ break;
+
+ xe_gt_dbg(gt, "load still in progress, timeouts = %d, freq = %dMHz (req %dMHz), status = 0x%08X [0x%02X/%02X]\n",
+ count, xe_guc_pc_get_act_freq(guc_pc),
+ guc_pc_get_cur_freq(guc_pc), status,
+ REG_FIELD_GET(GS_BOOTROM_MASK, status),
+ REG_FIELD_GET(GS_UKERNEL_MASK, status));
+ } while (1);
+
+ if (load_done != 1) {
+ u32 ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, status);
+ u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status);
+
+ xe_gt_err(gt, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz (req %dMHz), done = %d\n",
+ status, delta_ms, xe_guc_pc_get_act_freq(guc_pc),
+ guc_pc_get_cur_freq(guc_pc), load_done);
+ xe_gt_err(gt, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n",
+ REG_FIELD_GET(GS_MIA_IN_RESET, status),
+ bootrom, ukernel,
+ REG_FIELD_GET(GS_MIA_MASK, status),
+ REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
+
+ switch (bootrom) {
+ case XE_BOOTROM_STATUS_NO_KEY_FOUND:
+ xe_gt_err(gt, "invalid key requested, header = 0x%08X\n",
+ xe_mmio_read32(gt, GUC_HEADER_INFO));
+ break;
+
+ case XE_BOOTROM_STATUS_RSA_FAILED:
+ xe_gt_err(gt, "firmware signature verification failed\n");
+ break;
+
+ case XE_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE:
+ xe_gt_err(gt, "firmware production part check failure\n");
+ break;
}
- if (REG_FIELD_GET(GS_UKERNEL_MASK, status) ==
- XE_GUC_LOAD_STATUS_EXCEPTION) {
- xe_gt_info(gt, "GuC firmware exception. EIP: %#x\n",
- xe_mmio_read32(gt, SOFT_SCRATCH(13)));
- ret = -ENXIO;
+ switch (ukernel) {
+ case XE_GUC_LOAD_STATUS_EXCEPTION:
+ xe_gt_err(gt, "firmware exception. EIP: %#x\n",
+ xe_mmio_read32(gt, SOFT_SCRATCH(13)));
+ break;
+
+ case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
+ xe_gt_err(gt, "illegal register in save/restore workaround list\n");
+ break;
+
+ case XE_GUC_LOAD_STATUS_HWCONFIG_START:
+ xe_gt_err(gt, "still extracting hwconfig table.\n");
+ break;
}
+
+ xe_device_declare_wedged(gt_to_xe(gt));
+ } else if (delta_ms > GUC_LOAD_TIME_WARN_MS) {
+ xe_gt_warn(gt, "excessive init time: %lldms! [status = 0x%08X, timeouts = %d]\n",
+ delta_ms, status, count);
+ xe_gt_warn(gt, "excessive init time: [freq = %dMHz (req = %dMHz), before = %dMHz, perf_limit_reasons = 0x%08X]\n",
+ xe_guc_pc_get_act_freq(guc_pc), guc_pc_get_cur_freq(guc_pc),
+ before_freq, xe_gt_throttle_get_limit_reasons(gt));
} else {
- xe_gt_dbg(gt, "GuC successfully loaded\n");
+ xe_gt_dbg(gt, "init took %lldms, freq = %dMHz (req = %dMHz), before = %dMHz, status = 0x%08X, timeouts = %d\n",
+ delta_ms, xe_guc_pc_get_act_freq(guc_pc), guc_pc_get_cur_freq(guc_pc),
+ before_freq, status, count);
}
-
- return ret;
}
static int __xe_guc_upload(struct xe_guc *guc)
@@ -532,9 +722,7 @@ static int __xe_guc_upload(struct xe_guc *guc)
goto out;
/* Wait for authentication */
- ret = guc_wait_ucode(guc);
- if (ret)
- goto out;
+ guc_wait_ucode(guc);
xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_RUNNING);
return 0;
@@ -544,6 +732,38 @@ out:
return 0 /* FIXME: ret, don't want to stop load currently */;
}
+static int vf_guc_min_load_for_hwconfig(struct xe_guc *guc)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ int ret;
+
+ ret = xe_gt_sriov_vf_bootstrap(gt);
+ if (ret)
+ return ret;
+
+ ret = xe_gt_sriov_vf_query_config(gt);
+ if (ret)
+ return ret;
+
+ ret = xe_guc_hwconfig_init(guc);
+ if (ret)
+ return ret;
+
+ ret = xe_guc_enable_communication(guc);
+ if (ret)
+ return ret;
+
+ ret = xe_gt_sriov_vf_connect(gt);
+ if (ret)
+ return ret;
+
+ ret = xe_gt_sriov_vf_query_runtime(gt);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
/**
* xe_guc_min_load_for_hwconfig - load minimal GuC and read hwconfig table
* @guc: The GuC object
@@ -559,6 +779,9 @@ int xe_guc_min_load_for_hwconfig(struct xe_guc *guc)
{
int ret;
+ if (IS_SRIOV_VF(guc_to_xe(guc)))
+ return vf_guc_min_load_for_hwconfig(guc);
+
xe_guc_ads_populate_minimal(&guc->ads);
/* Raise GT freq to speed up HuC/GuC load */
@@ -891,17 +1114,11 @@ void xe_guc_stop_prepare(struct xe_guc *guc)
XE_WARN_ON(xe_guc_pc_stop(&guc->pc));
}
-int xe_guc_stop(struct xe_guc *guc)
+void xe_guc_stop(struct xe_guc *guc)
{
- int ret;
-
xe_guc_ct_stop(&guc->ct);
- ret = xe_guc_submit_stop(guc);
- if (ret)
- return ret;
-
- return 0;
+ xe_guc_submit_stop(guc);
}
int xe_guc_start(struct xe_guc *guc)
@@ -950,30 +1167,3 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
xe_guc_ct_print(&guc->ct, p, false);
xe_guc_submit_print(guc, p);
}
-
-/**
- * xe_guc_in_reset() - Detect if GuC MIA is in reset.
- * @guc: The GuC object
- *
- * This function detects runtime resume from d3cold by leveraging
- * GUC_STATUS, GUC doesn't get reset during d3hot,
- * it strictly to be called from RPM resume handler.
- *
- * Return: true if failed to get forcewake or GuC MIA is in Reset,
- * otherwise false.
- */
-bool xe_guc_in_reset(struct xe_guc *guc)
-{
- struct xe_gt *gt = guc_to_gt(guc);
- u32 status;
- int err;
-
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- return true;
-
- status = xe_mmio_read32(gt, GUC_STATUS);
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-
- return status & GS_MIA_IN_RESET;
-}