summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_sriov_vf.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe/xe_sriov_vf.c')
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf.c191
1 files changed, 179 insertions, 12 deletions
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c
index 26e243c28994..cdd9f8e78b2a 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.c
@@ -3,6 +3,7 @@
* Copyright © 2023-2024 Intel Corporation
*/
+#include <drm/drm_debugfs.h>
#include <drm/drm_managed.h>
#include "xe_assert.h"
@@ -10,11 +11,16 @@
#include "xe_gt.h"
#include "xe_gt_sriov_printk.h"
#include "xe_gt_sriov_vf.h"
+#include "xe_guc.h"
#include "xe_guc_ct.h"
+#include "xe_guc_submit.h"
+#include "xe_irq.h"
+#include "xe_lrc.h"
#include "xe_pm.h"
#include "xe_sriov.h"
#include "xe_sriov_printk.h"
#include "xe_sriov_vf.h"
+#include "xe_sriov_vf_ccs.h"
#include "xe_tile_sriov_vf.h"
/**
@@ -124,16 +130,66 @@
* | | |
*/
-static bool vf_migration_supported(struct xe_device *xe)
+/**
+ * xe_sriov_vf_migration_supported - Report whether SR-IOV VF migration is
+ * supported or not.
+ * @xe: the &xe_device to check
+ *
+ * Returns: true if VF migration is supported, false otherwise.
+ */
+bool xe_sriov_vf_migration_supported(struct xe_device *xe)
+{
+ xe_assert(xe, IS_SRIOV_VF(xe));
+ return xe->sriov.vf.migration.enabled;
+}
+
+static void vf_disable_migration(struct xe_device *xe, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list va_args;
+
+ xe_assert(xe, IS_SRIOV_VF(xe));
+
+ va_start(va_args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &va_args;
+ xe_sriov_notice(xe, "migration disabled: %pV\n", &vaf);
+ va_end(va_args);
+
+ xe->sriov.vf.migration.enabled = false;
+}
+
+static void migration_worker_func(struct work_struct *w);
+
+static void vf_migration_init_early(struct xe_device *xe)
{
/*
* TODO: Add conditions to allow specific platforms, when they're
* supported at production quality.
*/
- return IS_ENABLED(CONFIG_DRM_XE_DEBUG);
-}
+ if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG))
+ return vf_disable_migration(xe,
+ "experimental feature not available on production builds");
+
+ if (GRAPHICS_VER(xe) < 20)
+ return vf_disable_migration(xe, "requires gfx version >= 20, but only %u found",
+ GRAPHICS_VER(xe));
+
+ if (!IS_DGFX(xe)) {
+ struct xe_uc_fw_version guc_version;
+
+ xe_gt_sriov_vf_guc_versions(xe_device_get_gt(xe, 0), NULL, &guc_version);
+ if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 23, 0))
+ return vf_disable_migration(xe,
+ "CCS migration requires GuC ABI >= 1.23 but only %u.%u found",
+ guc_version.major, guc_version.minor);
+ }
-static void migration_worker_func(struct work_struct *w);
+ INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func);
+
+ xe->sriov.vf.migration.enabled = true;
+ xe_sriov_dbg(xe, "migration support enabled\n");
+}
/**
* xe_sriov_vf_init_early - Initialize SR-IOV VF specific data.
@@ -141,10 +197,57 @@ static void migration_worker_func(struct work_struct *w);
*/
void xe_sriov_vf_init_early(struct xe_device *xe)
{
- INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func);
+ vf_migration_init_early(xe);
+}
+
+/**
+ * vf_post_migration_shutdown - Stop the driver activities after VF migration.
+ * @xe: the &xe_device struct instance
+ *
+ * After this VM is migrated and assigned to a new VF, it is running on a new
+ * hardware, and therefore many hardware-dependent states and related structures
+ * require fixups. Without fixups, the hardware cannot do any work, and therefore
+ * all GPU pipelines are stalled.
+ * Stop some of kernel activities to make the fixup process faster.
+ */
+static void vf_post_migration_shutdown(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int ret = 0;
+
+ for_each_gt(gt, xe, id) {
+ xe_guc_submit_pause(&gt->uc.guc);
+ ret |= xe_guc_submit_reset_block(&gt->uc.guc);
+ }
+
+ if (ret)
+ drm_info(&xe->drm, "migration recovery encountered ongoing reset\n");
+}
+
+/**
+ * vf_post_migration_kickstart - Re-start the driver activities under new hardware.
+ * @xe: the &xe_device struct instance
+ *
+ * After we have finished with all post-migration fixups, restart the driver
+ * activities to continue feeding the GPU with workloads.
+ */
+static void vf_post_migration_kickstart(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ unsigned int id;
- if (!vf_migration_supported(xe))
- xe_sriov_info(xe, "migration not supported by this module version\n");
+ /*
+ * Make sure interrupts on the new HW are properly set. The GuC IRQ
+ * must be working at this point, since the recovery did started,
+ * but the rest was not enabled using the procedure from spec.
+ */
+ xe_irq_resume(xe);
+
+ for_each_gt(gt, xe, id) {
+ xe_guc_submit_reset_unblock(&gt->uc.guc);
+ xe_guc_submit_unpause(&gt->uc.guc);
+ }
}
static bool gt_vf_post_migration_needed(struct xe_gt *gt)
@@ -192,6 +295,11 @@ static int vf_get_next_migrated_gt_id(struct xe_device *xe)
return -1;
}
+static size_t post_migration_scratch_size(struct xe_device *xe)
+{
+ return max(xe_lrc_reg_size(xe), LRC_WA_BB_SIZE);
+}
+
/**
* Perform post-migration fixups on a single GT.
*
@@ -208,19 +316,31 @@ static int vf_get_next_migrated_gt_id(struct xe_device *xe)
static int gt_vf_post_migration_fixups(struct xe_gt *gt)
{
s64 shift;
+ void *buf;
int err;
+ buf = kmalloc(post_migration_scratch_size(gt_to_xe(gt)), GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
err = xe_gt_sriov_vf_query_config(gt);
if (err)
- return err;
+ goto out;
shift = xe_gt_sriov_vf_ggtt_shift(gt);
if (shift) {
xe_tile_sriov_vf_fixup_ggtt_nodes(gt_to_tile(gt), shift);
- /* FIXME: add the recovery steps */
+ xe_gt_sriov_vf_default_lrcs_hwsp_rebase(gt);
+ err = xe_guc_contexts_hwsp_rebase(&gt->uc.guc, buf);
+ if (err)
+ goto out;
+ xe_guc_jobs_ring_rebase(&gt->uc.guc);
xe_guc_ct_fixup_messages_with_ggtt(&gt->uc.guc.ct, shift);
}
- return 0;
+
+out:
+ kfree(buf);
+ return err;
}
static void vf_post_migration_recovery(struct xe_device *xe)
@@ -230,9 +350,10 @@ static void vf_post_migration_recovery(struct xe_device *xe)
drm_dbg(&xe->drm, "migration recovery in progress\n");
xe_pm_runtime_get(xe);
+ vf_post_migration_shutdown(xe);
- if (!vf_migration_supported(xe)) {
- xe_sriov_err(xe, "migration not supported by this module version\n");
+ if (!xe_sriov_vf_migration_supported(xe)) {
+ xe_sriov_err(xe, "migration is not supported\n");
err = -ENOTRECOVERABLE;
goto fail;
}
@@ -247,6 +368,7 @@ static void vf_post_migration_recovery(struct xe_device *xe)
set_bit(id, &fixed_gts);
}
+ vf_post_migration_kickstart(xe);
err = vf_post_migration_notify_resfix_done(xe, fixed_gts);
if (err)
goto fail;
@@ -306,3 +428,48 @@ void xe_sriov_vf_start_migration_recovery(struct xe_device *xe)
drm_info(&xe->drm, "VF migration recovery %s\n", started ?
"scheduled" : "already in progress");
}
+
+/**
+ * xe_sriov_vf_init_late() - SR-IOV VF late initialization functions.
+ * @xe: the &xe_device to initialize
+ *
+ * This function initializes code for CCS migration.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_vf_init_late(struct xe_device *xe)
+{
+ int err = 0;
+
+ if (xe_sriov_vf_migration_supported(xe))
+ err = xe_sriov_vf_ccs_init(xe);
+
+ return err;
+}
+
+static int sa_info_vf_ccs(struct seq_file *m, void *data)
+{
+ struct drm_info_node *node = m->private;
+ struct xe_device *xe = to_xe_device(node->minor->dev);
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ xe_sriov_vf_ccs_print(xe, &p);
+ return 0;
+}
+
+static const struct drm_info_list debugfs_list[] = {
+ { .name = "sa_info_vf_ccs", .show = sa_info_vf_ccs },
+};
+
+/**
+ * xe_sriov_vf_debugfs_register - Register VF debugfs attributes.
+ * @xe: the &xe_device
+ * @root: the root &dentry
+ *
+ * Prepare debugfs attributes exposed by the VF.
+ */
+void xe_sriov_vf_debugfs_register(struct xe_device *xe, struct dentry *root)
+{
+ drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list),
+ root, xe->drm.primary);
+}