summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe/xe_gt_sriov_vf.c')
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.c792
1 files changed, 575 insertions, 217 deletions
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index d3baba50f085..4c73a077d314 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -23,10 +23,19 @@
#include "xe_gt_sriov_vf.h"
#include "xe_gt_sriov_vf_types.h"
#include "xe_guc.h"
+#include "xe_guc_ct.h"
#include "xe_guc_hxg_helpers.h"
#include "xe_guc_relay.h"
+#include "xe_guc_submit.h"
+#include "xe_irq.h"
+#include "xe_lrc.h"
+#include "xe_memirq.h"
#include "xe_mmio.h"
#include "xe_sriov.h"
+#include "xe_sriov_vf.h"
+#include "xe_sriov_vf_ccs.h"
+#include "xe_tile_sriov_vf.h"
+#include "xe_tlb_inval.h"
#include "xe_uc_fw.h"
#include "xe_wopcm.h"
@@ -46,29 +55,52 @@ static int guc_action_vf_reset(struct xe_guc *guc)
return ret > 0 ? -EPROTO : ret;
}
+#define GUC_RESET_VF_STATE_RETRY_MAX 10
static int vf_reset_guc_state(struct xe_gt *gt)
{
+ unsigned int retry = GUC_RESET_VF_STATE_RETRY_MAX;
struct xe_guc *guc = &gt->uc.guc;
int err;
- err = guc_action_vf_reset(guc);
+ do {
+ err = guc_action_vf_reset(guc);
+ if (!err || err != -ETIMEDOUT)
+ break;
+ } while (--retry);
+
if (unlikely(err))
xe_gt_sriov_err(gt, "Failed to reset GuC state (%pe)\n", ERR_PTR(err));
return err;
}
+/**
+ * xe_gt_sriov_vf_reset - Reset GuC VF internal state.
+ * @gt: the &xe_gt
+ *
+ * It requires functional `GuC MMIO based communication`_.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_vf_reset(struct xe_gt *gt)
+{
+ if (!xe_device_uc_enabled(gt_to_xe(gt)))
+ return -ENODEV;
+
+ return vf_reset_guc_state(gt);
+}
+
static int guc_action_match_version(struct xe_guc *guc,
- u32 wanted_branch, u32 wanted_major, u32 wanted_minor,
- u32 *branch, u32 *major, u32 *minor, u32 *patch)
+ struct xe_uc_fw_version *wanted,
+ struct xe_uc_fw_version *found)
{
u32 request[VF2GUC_MATCH_VERSION_REQUEST_MSG_LEN] = {
FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
GUC_ACTION_VF2GUC_MATCH_VERSION),
- FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_BRANCH, wanted_branch) |
- FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MAJOR, wanted_major) |
- FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MINOR, wanted_minor),
+ FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_BRANCH, wanted->branch) |
+ FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MAJOR, wanted->major) |
+ FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MINOR, wanted->minor),
};
u32 response[GUC_MAX_MMIO_MSG_LEN];
int ret;
@@ -82,120 +114,138 @@ static int guc_action_match_version(struct xe_guc *guc,
if (unlikely(FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_0_MBZ, response[0])))
return -EPROTO;
- *branch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_BRANCH, response[1]);
- *major = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MAJOR, response[1]);
- *minor = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MINOR, response[1]);
- *patch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_PATCH, response[1]);
+ memset(found, 0, sizeof(struct xe_uc_fw_version));
+ found->branch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_BRANCH, response[1]);
+ found->major = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MAJOR, response[1]);
+ found->minor = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MINOR, response[1]);
+ found->patch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_PATCH, response[1]);
return 0;
}
-static void vf_minimum_guc_version(struct xe_gt *gt, u32 *branch, u32 *major, u32 *minor)
+static int guc_action_match_version_any(struct xe_guc *guc,
+ struct xe_uc_fw_version *found)
+{
+ struct xe_uc_fw_version wanted = {
+ .branch = GUC_VERSION_BRANCH_ANY,
+ .major = GUC_VERSION_MAJOR_ANY,
+ .minor = GUC_VERSION_MINOR_ANY,
+ .patch = 0
+ };
+
+ return guc_action_match_version(guc, &wanted, found);
+}
+
+static void vf_minimum_guc_version(struct xe_gt *gt, struct xe_uc_fw_version *ver)
{
struct xe_device *xe = gt_to_xe(gt);
+ memset(ver, 0, sizeof(struct xe_uc_fw_version));
+
switch (xe->info.platform) {
case XE_TIGERLAKE ... XE_PVC:
/* 1.1 this is current baseline for Xe driver */
- *branch = 0;
- *major = 1;
- *minor = 1;
+ ver->branch = 0;
+ ver->major = 1;
+ ver->minor = 1;
break;
default:
/* 1.2 has support for the GMD_ID KLV */
- *branch = 0;
- *major = 1;
- *minor = 2;
+ ver->branch = 0;
+ ver->major = 1;
+ ver->minor = 2;
break;
}
}
-static void vf_wanted_guc_version(struct xe_gt *gt, u32 *branch, u32 *major, u32 *minor)
+static void vf_wanted_guc_version(struct xe_gt *gt, struct xe_uc_fw_version *ver)
{
/* for now it's the same as minimum */
- return vf_minimum_guc_version(gt, branch, major, minor);
+ return vf_minimum_guc_version(gt, ver);
}
static int vf_handshake_with_guc(struct xe_gt *gt)
{
- struct xe_gt_sriov_vf_guc_version *guc_version = &gt->sriov.vf.guc_version;
+ struct xe_uc_fw_version *guc_version = &gt->sriov.vf.guc_version;
+ struct xe_uc_fw_version wanted = {0};
struct xe_guc *guc = &gt->uc.guc;
- u32 wanted_branch, wanted_major, wanted_minor;
- u32 branch, major, minor, patch;
+ bool old = false;
int err;
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
/* select wanted version - prefer previous (if any) */
if (guc_version->major || guc_version->minor) {
- wanted_branch = guc_version->branch;
- wanted_major = guc_version->major;
- wanted_minor = guc_version->minor;
+ wanted = *guc_version;
+ old = true;
} else {
- vf_wanted_guc_version(gt, &wanted_branch, &wanted_major, &wanted_minor);
- xe_gt_assert(gt, wanted_major != GUC_VERSION_MAJOR_ANY);
+ vf_wanted_guc_version(gt, &wanted);
+ xe_gt_assert(gt, wanted.major != GUC_VERSION_MAJOR_ANY);
+
+ /* First time we handshake, so record the minimum wanted */
+ gt->sriov.vf.wanted_guc_version = wanted;
}
- err = guc_action_match_version(guc, wanted_branch, wanted_major, wanted_minor,
- &branch, &major, &minor, &patch);
+ err = guc_action_match_version(guc, &wanted, guc_version);
if (unlikely(err))
goto fail;
- /* we don't support interface version change */
- if ((guc_version->major || guc_version->minor) &&
- (guc_version->branch != branch || guc_version->major != major ||
- guc_version->minor != minor)) {
- xe_gt_sriov_err(gt, "New GuC interface version detected: %u.%u.%u.%u\n",
- branch, major, minor, patch);
- xe_gt_sriov_info(gt, "Previously used version was: %u.%u.%u.%u\n",
- guc_version->branch, guc_version->major,
- guc_version->minor, guc_version->patch);
- err = -EREMCHG;
- goto fail;
+ if (old) {
+ /* we don't support interface version change */
+ if (MAKE_GUC_VER_STRUCT(*guc_version) != MAKE_GUC_VER_STRUCT(wanted)) {
+ xe_gt_sriov_err(gt, "New GuC interface version detected: %u.%u.%u.%u\n",
+ guc_version->branch, guc_version->major,
+ guc_version->minor, guc_version->patch);
+ xe_gt_sriov_info(gt, "Previously used version was: %u.%u.%u.%u\n",
+ wanted.branch, wanted.major,
+ wanted.minor, wanted.patch);
+ err = -EREMCHG;
+ goto fail;
+ } else {
+ /* version is unchanged, no need to re-verify it */
+ return 0;
+ }
}
/* illegal */
- if (major > wanted_major) {
+ if (guc_version->major > wanted.major) {
err = -EPROTO;
goto unsupported;
}
/* there's no fallback on major version. */
- if (major != wanted_major) {
+ if (guc_version->major != wanted.major) {
err = -ENOPKG;
goto unsupported;
}
/* check against minimum version supported by us */
- vf_minimum_guc_version(gt, &wanted_branch, &wanted_major, &wanted_minor);
- xe_gt_assert(gt, major != GUC_VERSION_MAJOR_ANY);
- if (major < wanted_major || (major == wanted_major && minor < wanted_minor)) {
+ vf_minimum_guc_version(gt, &wanted);
+ xe_gt_assert(gt, wanted.major != GUC_VERSION_MAJOR_ANY);
+ if (MAKE_GUC_VER_STRUCT(*guc_version) < MAKE_GUC_VER_STRUCT(wanted)) {
err = -ENOKEY;
goto unsupported;
}
xe_gt_sriov_dbg(gt, "using GuC interface version %u.%u.%u.%u\n",
- branch, major, minor, patch);
+ guc_version->branch, guc_version->major,
+ guc_version->minor, guc_version->patch);
- guc_version->branch = branch;
- guc_version->major = major;
- guc_version->minor = minor;
- guc_version->patch = patch;
return 0;
unsupported:
xe_gt_sriov_err(gt, "Unsupported GuC version %u.%u.%u.%u (%pe)\n",
- branch, major, minor, patch, ERR_PTR(err));
+ guc_version->branch, guc_version->major,
+ guc_version->minor, guc_version->patch,
+ ERR_PTR(err));
fail:
xe_gt_sriov_err(gt, "Unable to confirm GuC version %u.%u (%pe)\n",
- wanted_major, wanted_minor, ERR_PTR(err));
+ wanted.major, wanted.minor, ERR_PTR(err));
/* try again with *any* just to query which version is supported */
- if (!guc_action_match_version(guc, GUC_VERSION_BRANCH_ANY,
- GUC_VERSION_MAJOR_ANY, GUC_VERSION_MINOR_ANY,
- &branch, &major, &minor, &patch))
+ if (!guc_action_match_version_any(guc, &wanted))
xe_gt_sriov_notice(gt, "GuC reports interface version %u.%u.%u.%u\n",
- branch, major, minor, patch);
+ wanted.branch, wanted.major, wanted.minor, wanted.patch);
return err;
}
@@ -212,6 +262,9 @@ int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt)
{
int err;
+ if (!xe_device_uc_enabled(gt_to_xe(gt)))
+ return -ENODEV;
+
err = vf_reset_guc_state(gt);
if (unlikely(err))
return err;
@@ -223,6 +276,67 @@ int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt)
return 0;
}
+/**
+ * xe_gt_sriov_vf_guc_versions - Minimum required and found GuC ABI versions
+ * @gt: the &xe_gt
+ * @wanted: pointer to the xe_uc_fw_version to be filled with the wanted version
+ * @found: pointer to the xe_uc_fw_version to be filled with the found version
+ *
+ * This function is for VF use only and it can only be used after successful
+ * version handshake with the GuC.
+ */
+void xe_gt_sriov_vf_guc_versions(struct xe_gt *gt,
+ struct xe_uc_fw_version *wanted,
+ struct xe_uc_fw_version *found)
+{
+ xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+ xe_gt_assert(gt, gt->sriov.vf.guc_version.major);
+
+ if (wanted)
+ *wanted = gt->sriov.vf.wanted_guc_version;
+
+ if (found)
+ *found = gt->sriov.vf.guc_version;
+}
+
+static int guc_action_vf_notify_resfix_done(struct xe_guc *guc)
+{
+ u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = {
+ FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE),
+ };
+ int ret;
+
+ ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request));
+
+ return ret > 0 ? -EPROTO : ret;
+}
+
+/**
+ * vf_notify_resfix_done - Notify GuC about resource fixups apply completed.
+ * @gt: the &xe_gt struct instance linked to target GuC
+ *
+ * Returns: 0 if the operation completed successfully, or a negative error
+ * code otherwise.
+ */
+static int vf_notify_resfix_done(struct xe_gt *gt)
+{
+ struct xe_guc *guc = &gt->uc.guc;
+ int err;
+
+ xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+ err = guc_action_vf_notify_resfix_done(guc);
+ if (unlikely(err))
+ xe_gt_sriov_err(gt, "Failed to notify GuC about resource fixup done (%pe)\n",
+ ERR_PTR(err));
+ else
+ xe_gt_sriov_dbg_verbose(gt, "sent GuC resource fixup done\n");
+
+ return err;
+}
+
static int guc_action_query_single_klv(struct xe_guc *guc, u32 key,
u32 *value, u32 value_len)
{
@@ -326,13 +440,17 @@ u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt)
static int vf_get_ggtt_info(struct xe_gt *gt)
{
- struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config;
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_ggtt *ggtt = tile->mem.ggtt;
struct xe_guc *guc = &gt->uc.guc;
- u64 start, size;
+ u64 start, size, ggtt_size;
+ s64 shift;
int err;
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+ guard(mutex)(&ggtt->lock);
+
err = guc_action_query_single_klv64(guc, GUC_KLV_VF_CFG_GGTT_START_KEY, &start);
if (unlikely(err))
return err;
@@ -341,27 +459,44 @@ static int vf_get_ggtt_info(struct xe_gt *gt)
if (unlikely(err))
return err;
- if (config->ggtt_size && config->ggtt_size != size) {
+ if (!size)
+ return -ENODATA;
+
+ ggtt_size = xe_tile_sriov_vf_ggtt(tile);
+ if (ggtt_size && ggtt_size != size) {
xe_gt_sriov_err(gt, "Unexpected GGTT reassignment: %lluK != %lluK\n",
- size / SZ_1K, config->ggtt_size / SZ_1K);
+ size / SZ_1K, ggtt_size / SZ_1K);
return -EREMCHG;
}
xe_gt_sriov_dbg_verbose(gt, "GGTT %#llx-%#llx = %lluK\n",
start, start + size - 1, size / SZ_1K);
- config->ggtt_base = start;
- config->ggtt_size = size;
+ shift = start - (s64)xe_tile_sriov_vf_ggtt_base(tile);
+ xe_tile_sriov_vf_ggtt_base_store(tile, start);
+ xe_tile_sriov_vf_ggtt_store(tile, size);
- return config->ggtt_size ? 0 : -ENODATA;
+ if (shift && shift != start) {
+ xe_gt_sriov_info(gt, "Shifting GGTT base by %lld to 0x%016llx\n",
+ shift, start);
+ xe_tile_sriov_vf_fixup_ggtt_nodes_locked(gt_to_tile(gt), shift);
+ }
+
+ if (xe_sriov_vf_migration_supported(gt_to_xe(gt))) {
+ WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, false);
+ smp_wmb(); /* Ensure above write visible before wake */
+ wake_up_all(&gt->sriov.vf.migration.wq);
+ }
+
+ return 0;
}
static int vf_get_lmem_info(struct xe_gt *gt)
{
- struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config;
+ struct xe_tile *tile = gt_to_tile(gt);
struct xe_guc *guc = &gt->uc.guc;
char size_str[10];
- u64 size;
+ u64 size, lmem_size;
int err;
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
@@ -370,18 +505,19 @@ static int vf_get_lmem_info(struct xe_gt *gt)
if (unlikely(err))
return err;
- if (config->lmem_size && config->lmem_size != size) {
+ lmem_size = xe_tile_sriov_vf_lmem(tile);
+ if (lmem_size && lmem_size != size) {
xe_gt_sriov_err(gt, "Unexpected LMEM reassignment: %lluM != %lluM\n",
- size / SZ_1M, config->lmem_size / SZ_1M);
+ size / SZ_1M, lmem_size / SZ_1M);
return -EREMCHG;
}
string_get_size(size, 1, STRING_UNITS_2, size_str, sizeof(size_str));
xe_gt_sriov_dbg_verbose(gt, "LMEM %lluM %s\n", size / SZ_1M, size_str);
- config->lmem_size = size;
+ xe_tile_sriov_vf_lmem_store(tile, size);
- return config->lmem_size ? 0 : -ENODATA;
+ return size ? 0 : -ENODATA;
}
static int vf_get_submission_cfg(struct xe_gt *gt)
@@ -432,7 +568,9 @@ static void vf_cache_gmdid(struct xe_gt *gt)
* xe_gt_sriov_vf_query_config - Query SR-IOV config data over MMIO.
* @gt: the &xe_gt
*
- * This function is for VF use only.
+ * This function is for VF use only. This function may shift the GGTT and is
+ * performed under GGTT lock, making this step visible to all GTs that share a
+ * GGTT.
*
* Return: 0 on success or a negative error code on failure.
*/
@@ -445,7 +583,7 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt)
if (unlikely(err))
return err;
- if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) {
+ if (IS_DGFX(xe) && xe_gt_is_main_type(gt)) {
err = vf_get_lmem_info(gt);
if (unlikely(err))
return err;
@@ -478,125 +616,6 @@ u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt)
return gt->sriov.vf.self_config.num_ctxs;
}
-/**
- * xe_gt_sriov_vf_lmem - VF LMEM configuration.
- * @gt: the &xe_gt
- *
- * This function is for VF use only.
- *
- * Return: size of the LMEM assigned to VF.
- */
-u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt)
-{
- xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
- xe_gt_assert(gt, gt->sriov.vf.guc_version.major);
- xe_gt_assert(gt, gt->sriov.vf.self_config.lmem_size);
-
- return gt->sriov.vf.self_config.lmem_size;
-}
-
-static struct xe_ggtt_node *
-vf_balloon_ggtt_node(struct xe_ggtt *ggtt, u64 start, u64 end)
-{
- struct xe_ggtt_node *node;
- int err;
-
- node = xe_ggtt_node_init(ggtt);
- if (IS_ERR(node))
- return node;
-
- err = xe_ggtt_node_insert_balloon(node, start, end);
- if (err) {
- xe_ggtt_node_fini(node);
- return ERR_PTR(err);
- }
-
- return node;
-}
-
-static int vf_balloon_ggtt(struct xe_gt *gt)
-{
- struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config;
- struct xe_tile *tile = gt_to_tile(gt);
- struct xe_ggtt *ggtt = tile->mem.ggtt;
- struct xe_device *xe = gt_to_xe(gt);
- u64 start, end;
-
- xe_gt_assert(gt, IS_SRIOV_VF(xe));
- xe_gt_assert(gt, !xe_gt_is_media_type(gt));
-
- if (!config->ggtt_size)
- return -ENODATA;
-
- /*
- * VF can only use part of the GGTT as allocated by the PF:
- *
- * WOPCM GUC_GGTT_TOP
- * |<------------ Total GGTT size ------------------>|
- *
- * VF GGTT base -->|<- size ->|
- *
- * +--------------------+----------+-----------------+
- * |////////////////////| block |\\\\\\\\\\\\\\\\\|
- * +--------------------+----------+-----------------+
- *
- * |<--- balloon[0] --->|<-- VF -->|<-- balloon[1] ->|
- */
-
- start = xe_wopcm_size(xe);
- end = config->ggtt_base;
- if (end != start) {
- tile->sriov.vf.ggtt_balloon[0] = vf_balloon_ggtt_node(ggtt, start, end);
- if (IS_ERR(tile->sriov.vf.ggtt_balloon[0]))
- return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]);
- }
-
- start = config->ggtt_base + config->ggtt_size;
- end = GUC_GGTT_TOP;
- if (end != start) {
- tile->sriov.vf.ggtt_balloon[1] = vf_balloon_ggtt_node(ggtt, start, end);
- if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) {
- xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]);
- return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]);
- }
- }
-
- return 0;
-}
-
-static void deballoon_ggtt(struct drm_device *drm, void *arg)
-{
- struct xe_tile *tile = arg;
-
- xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
- xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[1]);
- xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]);
-}
-
-/**
- * xe_gt_sriov_vf_prepare_ggtt - Prepare a VF's GGTT configuration.
- * @gt: the &xe_gt
- *
- * This function is for VF use only.
- *
- * Return: 0 on success or a negative error code on failure.
- */
-int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt)
-{
- struct xe_tile *tile = gt_to_tile(gt);
- struct xe_device *xe = tile_to_xe(tile);
- int err;
-
- if (xe_gt_is_media_type(gt))
- return 0;
-
- err = vf_balloon_ggtt(gt);
- if (err)
- return err;
-
- return drmm_add_action_or_reset(&xe->drm, deballoon_ggtt, tile);
-}
-
static int relay_action_handshake(struct xe_gt *gt, u32 *major, u32 *minor)
{
u32 request[VF2PF_HANDSHAKE_REQUEST_MSG_LEN] = {
@@ -629,21 +648,22 @@ static int relay_action_handshake(struct xe_gt *gt, u32 *major, u32 *minor)
return 0;
}
-static void vf_connect_pf(struct xe_gt *gt, u16 major, u16 minor)
+static void vf_connect_pf(struct xe_device *xe, u16 major, u16 minor)
{
- xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+ xe_assert(xe, IS_SRIOV_VF(xe));
- gt->sriov.vf.pf_version.major = major;
- gt->sriov.vf.pf_version.minor = minor;
+ xe->sriov.vf.pf_version.major = major;
+ xe->sriov.vf.pf_version.minor = minor;
}
-static void vf_disconnect_pf(struct xe_gt *gt)
+static void vf_disconnect_pf(struct xe_device *xe)
{
- vf_connect_pf(gt, 0, 0);
+ vf_connect_pf(xe, 0, 0);
}
static int vf_handshake_with_pf(struct xe_gt *gt)
{
+ struct xe_device *xe = gt_to_xe(gt);
u32 major_wanted = GUC_RELAY_VERSION_LATEST_MAJOR;
u32 minor_wanted = GUC_RELAY_VERSION_LATEST_MINOR;
u32 major = major_wanted, minor = minor_wanted;
@@ -659,13 +679,13 @@ static int vf_handshake_with_pf(struct xe_gt *gt)
}
xe_gt_sriov_dbg(gt, "using VF/PF ABI %u.%u\n", major, minor);
- vf_connect_pf(gt, major, minor);
+ vf_connect_pf(xe, major, minor);
return 0;
failed:
xe_gt_sriov_err(gt, "Unable to confirm VF/PF ABI version %u.%u (%pe)\n",
major, minor, ERR_PTR(err));
- vf_disconnect_pf(gt);
+ vf_disconnect_pf(xe);
return err;
}
@@ -692,12 +712,75 @@ failed:
return err;
}
-static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor)
+/**
+ * xe_gt_sriov_vf_default_lrcs_hwsp_rebase - Update GGTT references in HWSP of default LRCs.
+ * @gt: the &xe_gt struct instance
+ */
+static void xe_gt_sriov_vf_default_lrcs_hwsp_rebase(struct xe_gt *gt)
{
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+
+ for_each_hw_engine(hwe, gt, id)
+ xe_default_lrc_update_memirq_regs_with_address(hwe);
+}
+
+static void vf_start_migration_recovery(struct xe_gt *gt)
+{
+ bool started;
+
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
- return major == gt->sriov.vf.pf_version.major &&
- minor <= gt->sriov.vf.pf_version.minor;
+ spin_lock(&gt->sriov.vf.migration.lock);
+
+ if (!gt->sriov.vf.migration.recovery_queued ||
+ !gt->sriov.vf.migration.recovery_teardown) {
+ gt->sriov.vf.migration.recovery_queued = true;
+ WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, true);
+ WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, true);
+ smp_wmb(); /* Ensure above writes visible before wake */
+
+ xe_guc_ct_wake_waiters(&gt->uc.guc.ct);
+
+ started = queue_work(gt->ordered_wq, &gt->sriov.vf.migration.worker);
+ xe_gt_sriov_info(gt, "VF migration recovery %s\n", started ?
+ "scheduled" : "already in progress");
+ }
+
+ spin_unlock(&gt->sriov.vf.migration.lock);
+}
+
+/**
+ * xe_gt_sriov_vf_migrated_event_handler - Start a VF migration recovery,
+ * or just mark that a GuC is ready for it.
+ * @gt: the &xe_gt struct instance linked to target GuC
+ *
+ * This function shall be called only by VF.
+ */
+void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ xe_gt_assert(gt, IS_SRIOV_VF(xe));
+ xe_gt_assert(gt, xe_gt_sriov_vf_recovery_pending(gt));
+
+ if (!xe_sriov_vf_migration_supported(xe)) {
+ xe_gt_sriov_err(gt, "migration not supported\n");
+ return;
+ }
+
+ xe_gt_sriov_info(gt, "ready for recovery after migration\n");
+ vf_start_migration_recovery(gt);
+}
+
+static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ xe_gt_assert(gt, IS_SRIOV_VF(xe));
+
+ return major == xe->sriov.vf.pf_version.major &&
+ minor <= xe->sriov.vf.pf_version.minor;
}
static int vf_prepare_runtime_info(struct xe_gt *gt, unsigned int num_regs)
@@ -885,7 +968,6 @@ u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg)
struct vf_runtime_reg *rr;
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
- xe_gt_assert(gt, gt->sriov.vf.pf_version.major);
xe_gt_assert(gt, !reg.vf);
if (reg.addr == GMD_ID.addr) {
@@ -943,20 +1025,25 @@ void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config;
struct xe_device *xe = gt_to_xe(gt);
+ u64 lmem_size;
char buf[10];
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
- drm_printf(p, "GGTT range:\t%#llx-%#llx\n",
- config->ggtt_base,
- config->ggtt_base + config->ggtt_size - 1);
+ if (xe_gt_is_main_type(gt)) {
+ u64 ggtt_size = xe_tile_sriov_vf_ggtt(gt_to_tile(gt));
+ u64 ggtt_base = xe_tile_sriov_vf_ggtt_base(gt_to_tile(gt));
- string_get_size(config->ggtt_size, 1, STRING_UNITS_2, buf, sizeof(buf));
- drm_printf(p, "GGTT size:\t%llu (%s)\n", config->ggtt_size, buf);
+ drm_printf(p, "GGTT range:\t%#llx-%#llx\n",
+ ggtt_base, ggtt_base + ggtt_size - 1);
+ string_get_size(ggtt_size, 1, STRING_UNITS_2, buf, sizeof(buf));
+ drm_printf(p, "GGTT size:\t%llu (%s)\n", ggtt_size, buf);
- if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) {
- string_get_size(config->lmem_size, 1, STRING_UNITS_2, buf, sizeof(buf));
- drm_printf(p, "LMEM size:\t%llu (%s)\n", config->lmem_size, buf);
+ if (IS_DGFX(xe)) {
+ lmem_size = xe_tile_sriov_vf_lmem(gt_to_tile(gt));
+ string_get_size(lmem_size, 1, STRING_UNITS_2, buf, sizeof(buf));
+ drm_printf(p, "LMEM size:\t%llu (%s)\n", lmem_size, buf);
+ }
}
drm_printf(p, "GuC contexts:\t%u\n", config->num_ctxs);
@@ -990,19 +1077,21 @@ void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p)
*/
void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p)
{
- struct xe_gt_sriov_vf_guc_version *guc_version = &gt->sriov.vf.guc_version;
- struct xe_gt_sriov_vf_relay_version *pf_version = &gt->sriov.vf.pf_version;
- u32 branch, major, minor;
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_uc_fw_version *guc_version = &gt->sriov.vf.guc_version;
+ struct xe_uc_fw_version *wanted = &gt->sriov.vf.wanted_guc_version;
+ struct xe_sriov_vf_relay_version *pf_version = &xe->sriov.vf.pf_version;
+ struct xe_uc_fw_version ver;
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
drm_printf(p, "GuC ABI:\n");
- vf_minimum_guc_version(gt, &branch, &major, &minor);
- drm_printf(p, "\tbase:\t%u.%u.%u.*\n", branch, major, minor);
+ vf_minimum_guc_version(gt, &ver);
+ drm_printf(p, "\tbase:\t%u.%u.%u.*\n", ver.branch, ver.major, ver.minor);
- vf_wanted_guc_version(gt, &branch, &major, &minor);
- drm_printf(p, "\twanted:\t%u.%u.%u.*\n", branch, major, minor);
+ drm_printf(p, "\twanted:\t%u.%u.%u.*\n",
+ wanted->branch, wanted->major, wanted->minor);
drm_printf(p, "\thandshake:\t%u.%u.%u.%u\n",
guc_version->branch, guc_version->major,
@@ -1017,3 +1106,272 @@ void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p)
drm_printf(p, "\thandshake:\t%u.%u\n",
pf_version->major, pf_version->minor);
}
+
+static bool vf_post_migration_shutdown(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ /*
+ * On platforms where CCS must be restored by the primary GT, the media
+ * GT's VF post-migration recovery must run afterward. Detect this case
+ * and re-queue the media GT's restore work item if necessary.
+ */
+ if (xe->info.needs_shared_vf_gt_wq && xe_gt_is_media_type(gt)) {
+ struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt;
+
+ if (xe_gt_sriov_vf_recovery_pending(primary_gt))
+ return true;
+ }
+
+ spin_lock_irq(&gt->sriov.vf.migration.lock);
+ gt->sriov.vf.migration.recovery_queued = false;
+ spin_unlock_irq(&gt->sriov.vf.migration.lock);
+
+ xe_guc_ct_flush_and_stop(&gt->uc.guc.ct);
+ xe_guc_submit_pause(&gt->uc.guc);
+ xe_tlb_inval_reset(&gt->tlb_inval);
+
+ return false;
+}
+
+static size_t post_migration_scratch_size(struct xe_device *xe)
+{
+ return max(xe_lrc_reg_size(xe), LRC_WA_BB_SIZE);
+}
+
+static int vf_post_migration_fixups(struct xe_gt *gt)
+{
+ void *buf = gt->sriov.vf.migration.scratch;
+ int err;
+
+ /* xe_gt_sriov_vf_query_config will fixup the GGTT addresses */
+ err = xe_gt_sriov_vf_query_config(gt);
+ if (err)
+ return err;
+
+ if (xe_gt_is_main_type(gt))
+ xe_sriov_vf_ccs_rebase(gt_to_xe(gt));
+
+ xe_gt_sriov_vf_default_lrcs_hwsp_rebase(gt);
+ err = xe_guc_contexts_hwsp_rebase(&gt->uc.guc, buf);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static void vf_post_migration_rearm(struct xe_gt *gt)
+{
+ xe_guc_ct_restart(&gt->uc.guc.ct);
+ xe_guc_submit_unpause_prepare(&gt->uc.guc);
+}
+
+static void vf_post_migration_kickstart(struct xe_gt *gt)
+{
+ xe_guc_submit_unpause(&gt->uc.guc);
+}
+
+static void vf_post_migration_abort(struct xe_gt *gt)
+{
+ spin_lock_irq(&gt->sriov.vf.migration.lock);
+ WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, false);
+ WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, false);
+ spin_unlock_irq(&gt->sriov.vf.migration.lock);
+
+ wake_up_all(&gt->sriov.vf.migration.wq);
+
+ xe_guc_submit_pause_abort(&gt->uc.guc);
+}
+
+static int vf_post_migration_notify_resfix_done(struct xe_gt *gt)
+{
+ bool skip_resfix = false;
+
+ spin_lock_irq(&gt->sriov.vf.migration.lock);
+ if (gt->sriov.vf.migration.recovery_queued) {
+ skip_resfix = true;
+ xe_gt_sriov_dbg(gt, "another recovery imminent, resfix skipped\n");
+ } else {
+ WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, false);
+ }
+ spin_unlock_irq(&gt->sriov.vf.migration.lock);
+
+ if (skip_resfix)
+ return -EAGAIN;
+
+ /*
+ * Make sure interrupts on the new HW are properly set. The GuC IRQ
+ * must be working at this point, since the recovery did started,
+ * but the rest was not enabled using the procedure from spec.
+ */
+ xe_irq_resume(gt_to_xe(gt));
+
+ return vf_notify_resfix_done(gt);
+}
+
+static void vf_post_migration_recovery(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ int err;
+ bool retry;
+
+ xe_gt_sriov_dbg(gt, "migration recovery in progress\n");
+
+ retry = vf_post_migration_shutdown(gt);
+ if (retry)
+ goto queue;
+
+ if (!xe_sriov_vf_migration_supported(xe)) {
+ xe_gt_sriov_err(gt, "migration is not supported\n");
+ err = -ENOTRECOVERABLE;
+ goto fail;
+ }
+
+ err = vf_post_migration_fixups(gt);
+ if (err)
+ goto fail;
+
+ vf_post_migration_rearm(gt);
+
+ err = vf_post_migration_notify_resfix_done(gt);
+ if (err && err != -EAGAIN)
+ goto fail;
+
+ vf_post_migration_kickstart(gt);
+
+ xe_gt_sriov_notice(gt, "migration recovery ended\n");
+ return;
+fail:
+ vf_post_migration_abort(gt);
+ xe_gt_sriov_err(gt, "migration recovery failed (%pe)\n", ERR_PTR(err));
+ xe_device_declare_wedged(xe);
+ return;
+
+queue:
+ xe_gt_sriov_info(gt, "Re-queuing migration recovery\n");
+ queue_work(gt->ordered_wq, &gt->sriov.vf.migration.worker);
+}
+
+static void migration_worker_func(struct work_struct *w)
+{
+ struct xe_gt *gt = container_of(w, struct xe_gt,
+ sriov.vf.migration.worker);
+
+ vf_post_migration_recovery(gt);
+}
+
+static void vf_migration_fini(void *arg)
+{
+ struct xe_gt *gt = arg;
+
+ spin_lock_irq(&gt->sriov.vf.migration.lock);
+ gt->sriov.vf.migration.recovery_teardown = true;
+ spin_unlock_irq(&gt->sriov.vf.migration.lock);
+
+ cancel_work_sync(&gt->sriov.vf.migration.worker);
+}
+
+/**
+ * xe_gt_sriov_vf_init_early() - GT VF init early
+ * @gt: the &xe_gt
+ *
+ * Return 0 on success, errno on failure
+ */
+int xe_gt_sriov_vf_init_early(struct xe_gt *gt)
+{
+ void *buf;
+
+ if (!xe_sriov_vf_migration_supported(gt_to_xe(gt)))
+ return 0;
+
+ buf = drmm_kmalloc(&gt_to_xe(gt)->drm,
+ post_migration_scratch_size(gt_to_xe(gt)),
+ GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ gt->sriov.vf.migration.scratch = buf;
+ spin_lock_init(&gt->sriov.vf.migration.lock);
+ INIT_WORK(&gt->sriov.vf.migration.worker, migration_worker_func);
+ init_waitqueue_head(&gt->sriov.vf.migration.wq);
+
+ return 0;
+}
+
+/**
+ * xe_gt_sriov_vf_init() - GT VF init
+ * @gt: the &xe_gt
+ *
+ * Return 0 on success, errno on failure
+ */
+int xe_gt_sriov_vf_init(struct xe_gt *gt)
+{
+ if (!xe_sriov_vf_migration_supported(gt_to_xe(gt)))
+ return 0;
+
+ /*
+ * We want to tear down the VF post-migration early during driver
+ * unload; therefore, we add this finalization action later during
+ * driver load.
+ */
+ return devm_add_action_or_reset(gt_to_xe(gt)->drm.dev,
+ vf_migration_fini, gt);
+}
+
+/**
+ * xe_gt_sriov_vf_recovery_pending() - VF post migration recovery pending
+ * @gt: the &xe_gt
+ *
+ * The return value of this function must be immediately visible upon vCPU
+ * unhalt and must persist until RESFIX_DONE is issued. This guarantee is
+ * currently implemented only for platforms that support memirq. If non-memirq
+ * platforms begin to support VF migration, this function will need to be
+ * updated accordingly.
+ *
+ * Return: True if VF post migration recovery is pending, False otherwise
+ */
+bool xe_gt_sriov_vf_recovery_pending(struct xe_gt *gt)
+{
+ struct xe_memirq *memirq = &gt_to_tile(gt)->memirq;
+
+ xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+ /* early detection until recovery starts */
+ if (xe_device_uses_memirq(gt_to_xe(gt)) &&
+ xe_memirq_guc_sw_int_0_irq_pending(memirq, &gt->uc.guc))
+ return true;
+
+ return READ_ONCE(gt->sriov.vf.migration.recovery_inprogress);
+}
+
+static bool vf_valid_ggtt(struct xe_gt *gt)
+{
+ struct xe_memirq *memirq = &gt_to_tile(gt)->memirq;
+ bool irq_pending = xe_device_uses_memirq(gt_to_xe(gt)) &&
+ xe_memirq_guc_sw_int_0_irq_pending(memirq, &gt->uc.guc);
+
+ xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+ if (irq_pending || READ_ONCE(gt->sriov.vf.migration.ggtt_need_fixes))
+ return false;
+
+ return true;
+}
+
+/**
+ * xe_gt_sriov_vf_wait_valid_ggtt() - VF wait for valid GGTT addresses
+ * @gt: the &xe_gt
+ */
+void xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt)
+{
+ int ret;
+
+ if (!IS_SRIOV_VF(gt_to_xe(gt)) ||
+ !xe_sriov_vf_migration_supported(gt_to_xe(gt)))
+ return;
+
+ ret = wait_event_interruptible_timeout(gt->sriov.vf.migration.wq,
+ vf_valid_ggtt(gt),
+ HZ * 5);
+ xe_gt_WARN_ON(gt, !ret);
+}