diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_sriov_vf.c')
-rw-r--r-- | drivers/gpu/drm/xe/xe_sriov_vf.c | 163 |
1 files changed, 104 insertions, 59 deletions
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index c1275e64aa9c..26e243c28994 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -7,12 +7,15 @@ #include "xe_assert.h" #include "xe_device.h" +#include "xe_gt.h" #include "xe_gt_sriov_printk.h" #include "xe_gt_sriov_vf.h" +#include "xe_guc_ct.h" #include "xe_pm.h" #include "xe_sriov.h" #include "xe_sriov_printk.h" #include "xe_sriov_vf.h" +#include "xe_tile_sriov_vf.h" /** * DOC: VF restore procedure in PF KMD and VF KMD @@ -121,6 +124,15 @@ * | | | */ +static bool vf_migration_supported(struct xe_device *xe) +{ + /* + * TODO: Add conditions to allow specific platforms, when they're + * supported at production quality. + */ + return IS_ENABLED(CONFIG_DRM_XE_DEBUG); +} + static void migration_worker_func(struct work_struct *w); /** @@ -130,86 +142,118 @@ static void migration_worker_func(struct work_struct *w); void xe_sriov_vf_init_early(struct xe_device *xe) { INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); + + if (!vf_migration_supported(xe)) + xe_sriov_info(xe, "migration not supported by this module version\n"); } -/** - * vf_post_migration_requery_guc - Re-query GuC for current VF provisioning. +static bool gt_vf_post_migration_needed(struct xe_gt *gt) +{ + return test_bit(gt->info.id, >_to_xe(gt)->sriov.vf.migration.gt_flags); +} + +/* + * Notify GuCs marked in flags about resource fixups apply finished. * @xe: the &xe_device struct instance - * - * After migration, we need to re-query all VF configuration to make sure - * they match previous provisioning. Note that most of VF provisioning - * shall be the same, except GGTT range, since GGTT is not virtualized per-VF. - * - * Returns: 0 if the operation completed successfully, or a negative error - * code otherwise. + * @gt_flags: flags marking to which GTs the notification shall be sent */ -static int vf_post_migration_requery_guc(struct xe_device *xe) +static int vf_post_migration_notify_resfix_done(struct xe_device *xe, unsigned long gt_flags) { struct xe_gt *gt; unsigned int id; - int err, ret = 0; + int err = 0; for_each_gt(gt, xe, id) { - err = xe_gt_sriov_vf_query_config(gt); - ret = ret ?: err; + if (!test_bit(id, >_flags)) + continue; + /* skip asking GuC for RESFIX exit if new recovery request arrived */ + if (gt_vf_post_migration_needed(gt)) + continue; + err = xe_gt_sriov_vf_notify_resfix_done(gt); + if (err) + break; + clear_bit(id, >_flags); } - return ret; -} - -/* - * vf_post_migration_imminent - Check if post-restore recovery is coming. - * @xe: the &xe_device struct instance - * - * Return: True if migration recovery worker will soon be running. Any worker currently - * executing does not affect the result. - */ -static bool vf_post_migration_imminent(struct xe_device *xe) -{ - return xe->sriov.vf.migration.gt_flags != 0 || - work_pending(&xe->sriov.vf.migration.worker); + if (gt_flags && !err) + drm_dbg(&xe->drm, "another recovery imminent, skipped some notifications\n"); + return err; } -/* - * Notify all GuCs about resource fixups apply finished. - */ -static void vf_post_migration_notify_resfix_done(struct xe_device *xe) +static int vf_get_next_migrated_gt_id(struct xe_device *xe) { struct xe_gt *gt; unsigned int id; for_each_gt(gt, xe, id) { - if (vf_post_migration_imminent(xe)) - goto skip; - xe_gt_sriov_vf_notify_resfix_done(gt); + if (test_and_clear_bit(id, &xe->sriov.vf.migration.gt_flags)) + return id; } - return; + return -1; +} -skip: - drm_dbg(&xe->drm, "another recovery imminent, skipping notifications\n"); +/** + * Perform post-migration fixups on a single GT. + * + * After migration, GuC needs to be re-queried for VF configuration to check + * if it matches previous provisioning. Most of VF provisioning shall be the + * same, except GGTT range, since GGTT is not virtualized per-VF. If GGTT + * range has changed, we have to perform fixups - shift all GGTT references + * used anywhere within the driver. After the fixups in this function succeed, + * it is allowed to ask the GuC bound to this GT to continue normal operation. + * + * Returns: 0 if the operation completed successfully, or a negative error + * code otherwise. + */ +static int gt_vf_post_migration_fixups(struct xe_gt *gt) +{ + s64 shift; + int err; + + err = xe_gt_sriov_vf_query_config(gt); + if (err) + return err; + + shift = xe_gt_sriov_vf_ggtt_shift(gt); + if (shift) { + xe_tile_sriov_vf_fixup_ggtt_nodes(gt_to_tile(gt), shift); + /* FIXME: add the recovery steps */ + xe_guc_ct_fixup_messages_with_ggtt(>->uc.guc.ct, shift); + } + return 0; } static void vf_post_migration_recovery(struct xe_device *xe) { - int err; + unsigned long fixed_gts = 0; + int id, err; drm_dbg(&xe->drm, "migration recovery in progress\n"); xe_pm_runtime_get(xe); - err = vf_post_migration_requery_guc(xe); - if (vf_post_migration_imminent(xe)) - goto defer; - if (unlikely(err)) + + if (!vf_migration_supported(xe)) { + xe_sriov_err(xe, "migration not supported by this module version\n"); + err = -ENOTRECOVERABLE; + goto fail; + } + + while (id = vf_get_next_migrated_gt_id(xe), id >= 0) { + struct xe_gt *gt = xe_device_get_gt(xe, id); + + err = gt_vf_post_migration_fixups(gt); + if (err) + goto fail; + + set_bit(id, &fixed_gts); + } + + err = vf_post_migration_notify_resfix_done(xe, fixed_gts); + if (err) goto fail; - /* FIXME: add the recovery steps */ - vf_post_migration_notify_resfix_done(xe); xe_pm_runtime_put(xe); drm_notice(&xe->drm, "migration recovery ended\n"); return; -defer: - xe_pm_runtime_put(xe); - drm_dbg(&xe->drm, "migration recovery deferred\n"); - return; fail: xe_pm_runtime_put(xe); drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err)); @@ -224,18 +268,23 @@ static void migration_worker_func(struct work_struct *w) vf_post_migration_recovery(xe); } -static bool vf_ready_to_recovery_on_all_gts(struct xe_device *xe) +/* + * Check if post-restore recovery is coming on any of GTs. + * @xe: the &xe_device struct instance + * + * Return: True if migration recovery worker will soon be running. Any worker currently + * executing does not affect the result. + */ +static bool vf_ready_to_recovery_on_any_gts(struct xe_device *xe) { struct xe_gt *gt; unsigned int id; for_each_gt(gt, xe, id) { - if (!test_bit(id, &xe->sriov.vf.migration.gt_flags)) { - xe_gt_sriov_dbg_verbose(gt, "still not ready to recover\n"); - return false; - } + if (test_bit(id, &xe->sriov.vf.migration.gt_flags)) + return true; } - return true; + return false; } /** @@ -250,13 +299,9 @@ void xe_sriov_vf_start_migration_recovery(struct xe_device *xe) xe_assert(xe, IS_SRIOV_VF(xe)); - if (!vf_ready_to_recovery_on_all_gts(xe)) + if (!vf_ready_to_recovery_on_any_gts(xe)) return; - WRITE_ONCE(xe->sriov.vf.migration.gt_flags, 0); - /* Ensure other threads see that no flags are set now. */ - smp_mb(); - started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker); drm_info(&xe->drm, "VF migration recovery %s\n", started ? "scheduled" : "already in progress"); |