summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt/intel_reset.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_reset.c')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c189
1 files changed, 138 insertions, 51 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index cc6bd21a3e51..41b5036dc538 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -9,30 +9,28 @@
#include "display/intel_display_reset.h"
#include "display/intel_overlay.h"
-
#include "gem/i915_gem_context.h"
-
#include "gt/intel_gt_regs.h"
-
#include "gt/uc/intel_gsc_fw.h"
+#include "uc/intel_guc.h"
#include "i915_drv.h"
#include "i915_file_private.h"
#include "i915_gpu_error.h"
#include "i915_irq.h"
#include "i915_reg.h"
+#include "i915_wait_util.h"
#include "intel_breadcrumbs.h"
#include "intel_engine_pm.h"
#include "intel_engine_regs.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
+#include "intel_gt_print.h"
#include "intel_gt_requests.h"
#include "intel_mchbar_regs.h"
#include "intel_pci_config.h"
#include "intel_reset.h"
-#include "uc/intel_guc.h"
-
#define RESET_MAX_RETRIES 3
static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
@@ -161,16 +159,16 @@ static int i915_do_reset(struct intel_gt *gt,
struct pci_dev *pdev = to_pci_dev(gt->i915->drm.dev);
int err;
- /* Assert reset for at least 20 usec, and wait for acknowledgement. */
+ /* Assert reset for at least 50 usec, and wait for acknowledgement. */
pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
udelay(50);
- err = wait_for_atomic(i915_in_reset(pdev), 50);
+ err = _wait_for_atomic(i915_in_reset(pdev), 50000, 0);
/* Clear the reset request. */
pci_write_config_byte(pdev, I915_GDRST, 0);
udelay(50);
if (!err)
- err = wait_for_atomic(!i915_in_reset(pdev), 50);
+ err = _wait_for_atomic(!i915_in_reset(pdev), 50000, 0);
return err;
}
@@ -190,7 +188,7 @@ static int g33_do_reset(struct intel_gt *gt,
struct pci_dev *pdev = to_pci_dev(gt->i915->drm.dev);
pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
- return wait_for_atomic(g4x_reset_complete(pdev), 50);
+ return _wait_for_atomic(g4x_reset_complete(pdev), 50000, 0);
}
static int g4x_do_reset(struct intel_gt *gt,
@@ -207,7 +205,7 @@ static int g4x_do_reset(struct intel_gt *gt,
pci_write_config_byte(pdev, I915_GDRST,
GRDOM_MEDIA | GRDOM_RESET_ENABLE);
- ret = wait_for_atomic(g4x_reset_complete(pdev), 50);
+ ret = _wait_for_atomic(g4x_reset_complete(pdev), 50000, 0);
if (ret) {
GT_TRACE(gt, "Wait for media reset failed\n");
goto out;
@@ -215,7 +213,7 @@ static int g4x_do_reset(struct intel_gt *gt,
pci_write_config_byte(pdev, I915_GDRST,
GRDOM_RENDER | GRDOM_RESET_ENABLE);
- ret = wait_for_atomic(g4x_reset_complete(pdev), 50);
+ ret = _wait_for_atomic(g4x_reset_complete(pdev), 50000, 0);
if (ret) {
GT_TRACE(gt, "Wait for render reset failed\n");
goto out;
@@ -592,10 +590,10 @@ static int gen8_engine_reset_prepare(struct intel_engine_cs *engine)
ret = __intel_wait_for_register_fw(uncore, reg, mask, ack,
700, 0, NULL);
if (ret)
- drm_err(&engine->i915->drm,
- "%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n",
- engine->name, request,
- intel_uncore_read_fw(uncore, reg));
+ gt_err(engine->gt,
+ "%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n",
+ engine->name, request,
+ intel_uncore_read_fw(uncore, reg));
return ret;
}
@@ -705,7 +703,7 @@ static int __reset_guc(struct intel_gt *gt)
static bool needs_wa_14015076503(struct intel_gt *gt, intel_engine_mask_t engine_mask)
{
- if (!IS_METEORLAKE(gt->i915) || !HAS_ENGINE(gt, GSC0))
+ if (MEDIA_VER_FULL(gt->i915) != IP_VER(13, 0) || !HAS_ENGINE(gt, GSC0))
return false;
if (!__HAS_ENGINE(engine_mask, GSC0))
@@ -763,7 +761,7 @@ wa_14015076503_end(struct intel_gt *gt, intel_engine_mask_t engine_mask)
HECI_H_GS1_ER_PREP, 0);
}
-int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
+static int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
{
const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1;
reset_func reset;
@@ -785,9 +783,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
reset_mask = wa_14015076503_start(gt, engine_mask, !retry);
GT_TRACE(gt, "engine_mask=%x\n", reset_mask);
- preempt_disable();
ret = reset(gt, reset_mask, retry);
- preempt_enable();
wa_14015076503_end(gt, reset_mask);
}
@@ -880,8 +876,17 @@ static intel_engine_mask_t reset_prepare(struct intel_gt *gt)
intel_engine_mask_t awake = 0;
enum intel_engine_id id;
- /* For GuC mode, ensure submission is disabled before stopping ring */
- intel_uc_reset_prepare(&gt->uc);
+ /**
+ * For GuC mode with submission enabled, ensure submission
+ * is disabled before stopping ring.
+ *
+ * For GuC mode with submission disabled, ensure that GuC is not
+ * sanitized, do that after engine reset. reset_prepare()
+ * is followed by engine reset which in this mode requires GuC to
+ * process any CSB FIFO entries generated by the resets.
+ */
+ if (intel_uc_uses_guc_submission(&gt->uc))
+ intel_uc_reset_prepare(&gt->uc);
for_each_engine(engine, gt, id) {
if (intel_engine_pm_get_if_awake(engine))
@@ -978,8 +983,8 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
awake = reset_prepare(gt);
/* Even if the GPU reset fails, it should still stop the engines */
- if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
- __intel_gt_reset(gt, ALL_ENGINES);
+ if (!intel_gt_gpu_reset_clobbers_display(gt))
+ intel_gt_reset_all_engines(gt);
for_each_engine(engine, gt, id)
engine->submit_request = nop_submit_request;
@@ -1005,6 +1010,15 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
GT_TRACE(gt, "end\n");
}
+static void set_wedged_work(struct work_struct *w)
+{
+ struct intel_gt *gt = container_of(w, struct intel_gt, wedge);
+ intel_wakeref_t wf;
+
+ with_intel_runtime_pm(gt->uncore->rpm, wf)
+ __intel_gt_set_wedged(gt);
+}
+
void intel_gt_set_wedged(struct intel_gt *gt)
{
intel_wakeref_t wakeref;
@@ -1016,7 +1030,8 @@ void intel_gt_set_wedged(struct intel_gt *gt)
mutex_lock(&gt->reset.mutex);
if (GEM_SHOW_DEBUG()) {
- struct drm_printer p = drm_debug_printer(__func__);
+ struct drm_printer p = drm_dbg_printer(&gt->i915->drm,
+ DRM_UT_DRIVER, NULL);
struct intel_engine_cs *engine;
enum intel_engine_id id;
@@ -1080,7 +1095,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
dma_fence_default_wait(fence, false, MAX_SCHEDULE_TIMEOUT);
dma_fence_put(fence);
- /* Restart iteration after droping lock */
+ /* Restart iteration after dropping lock */
spin_lock(&timelines->lock);
tl = list_entry(&timelines->active_list, typeof(*tl), link);
}
@@ -1088,8 +1103,8 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
/* We must reset pending GPU events before restoring our submission */
ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */
- if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
- ok = __intel_gt_reset(gt, ALL_ENGINES) == 0;
+ if (!intel_gt_gpu_reset_clobbers_display(gt))
+ ok = intel_gt_reset_all_engines(gt) == 0;
if (!ok) {
/*
* Warn CI about the unrecoverable wedged condition.
@@ -1133,10 +1148,10 @@ static int do_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
{
int err, i;
- err = __intel_gt_reset(gt, ALL_ENGINES);
+ err = intel_gt_reset_all_engines(gt);
for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
msleep(10 * (i + 1));
- err = __intel_gt_reset(gt, ALL_ENGINES);
+ err = intel_gt_reset_all_engines(gt);
}
if (err)
return err;
@@ -1159,6 +1174,13 @@ static int resume(struct intel_gt *gt)
return 0;
}
+bool intel_gt_gpu_reset_clobbers_display(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+
+ return INTEL_INFO(i915)->gpu_reset_clobbers_display;
+}
+
/**
* intel_gt_reset - reset chip after a hang
* @gt: #intel_gt to reset
@@ -1180,6 +1202,7 @@ void intel_gt_reset(struct intel_gt *gt,
intel_engine_mask_t stalled_mask,
const char *reason)
{
+ struct intel_display *display = gt->i915->display;
intel_engine_mask_t awake;
int ret;
@@ -1201,33 +1224,35 @@ void intel_gt_reset(struct intel_gt *gt,
goto unlock;
if (reason)
- drm_notice(&gt->i915->drm,
- "Resetting chip for %s\n", reason);
+ gt_notice(gt, "Resetting chip for %s\n", reason);
atomic_inc(&gt->i915->gpu_error.reset_count);
awake = reset_prepare(gt);
if (!intel_has_gpu_reset(gt)) {
if (gt->i915->params.reset)
- drm_err(&gt->i915->drm, "GPU reset not supported\n");
+ gt_err(gt, "GPU reset not supported\n");
else
- drm_dbg(&gt->i915->drm, "GPU reset disabled\n");
+ gt_dbg(gt, "GPU reset disabled\n");
goto error;
}
- if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
- intel_runtime_pm_disable_interrupts(gt->i915);
+ if (intel_gt_gpu_reset_clobbers_display(gt))
+ intel_irq_suspend(gt->i915);
if (do_reset(gt, stalled_mask)) {
- drm_err(&gt->i915->drm, "Failed to reset chip\n");
+ gt_err(gt, "Failed to reset chip\n");
goto taint;
}
- if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
- intel_runtime_pm_enable_interrupts(gt->i915);
+ if (intel_gt_gpu_reset_clobbers_display(gt))
+ intel_irq_resume(gt->i915);
- intel_overlay_reset(gt->i915);
+ intel_overlay_reset(display);
+ /* sanitize uC after engine reset */
+ if (!intel_uc_uses_guc_submission(&gt->uc))
+ intel_uc_reset_prepare(&gt->uc);
/*
* Next we need to restore the context, but we don't use those
* yet either...
@@ -1238,9 +1263,7 @@ void intel_gt_reset(struct intel_gt *gt,
*/
ret = intel_gt_init_hw(gt);
if (ret) {
- drm_err(&gt->i915->drm,
- "Failed to initialise HW following reset (%d)\n",
- ret);
+ gt_err(gt, "Failed to initialise HW following reset (%d)\n", ret);
goto taint;
}
@@ -1273,7 +1296,30 @@ error:
goto finish;
}
-static int intel_gt_reset_engine(struct intel_engine_cs *engine)
+/**
+ * intel_gt_reset_all_engines() - Reset all engines in the given gt.
+ * @gt: the GT to reset all engines for.
+ *
+ * This function resets all engines within the given gt.
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int intel_gt_reset_all_engines(struct intel_gt *gt)
+{
+ return __intel_gt_reset(gt, ALL_ENGINES);
+}
+
+/**
+ * intel_gt_reset_engine() - Reset a specific engine within a gt.
+ * @engine: engine to be reset.
+ *
+ * This function resets the specified engine within a gt.
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int intel_gt_reset_engine(struct intel_engine_cs *engine)
{
return __intel_gt_reset(engine->gt, engine->mask);
}
@@ -1297,7 +1343,7 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
if (msg)
drm_notice(&engine->i915->drm,
"Resetting %s for %s\n", engine->name, msg);
- atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
+ i915_increase_reset_engine_count(&engine->i915->gpu_error, engine);
ret = intel_gt_reset_engine(engine);
if (ret) {
@@ -1351,6 +1397,11 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
return err;
}
+static void display_reset_modeset_stuck(void *gt)
+{
+ intel_gt_set_wedged(gt);
+}
+
static void intel_gt_reset_global(struct intel_gt *gt,
u32 engine_mask,
const char *reason)
@@ -1368,15 +1419,34 @@ static void intel_gt_reset_global(struct intel_gt *gt,
/* Use a watchdog to ensure that our reset completes */
intel_wedge_on_timeout(&w, gt, 60 * HZ) {
- intel_display_reset_prepare(gt->i915);
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_display *display = i915->display;
+ bool need_display_reset;
+ bool reset_display;
+
+ need_display_reset = intel_gt_gpu_reset_clobbers_display(gt) &&
+ intel_has_gpu_reset(gt);
+
+ reset_display = intel_display_reset_test(display) ||
+ need_display_reset;
+
+ if (reset_display)
+ reset_display = intel_display_reset_prepare(display,
+ display_reset_modeset_stuck,
+ gt);
intel_gt_reset(gt, engine_mask, reason);
- intel_display_reset_finish(gt->i915);
+ if (reset_display)
+ intel_display_reset_finish(display, !need_display_reset);
}
if (!test_bit(I915_WEDGED, &gt->reset.flags))
kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event);
+ else
+ drm_dev_wedged_event(&gt->i915->drm,
+ DRM_WEDGE_RECOVERY_REBIND | DRM_WEDGE_RECOVERY_BUS_RESET,
+ NULL);
}
/**
@@ -1437,7 +1507,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) {
local_bh_disable();
for_each_engine_masked(engine, gt, engine_mask, tmp) {
- BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
+ BUILD_BUG_ON(I915_RESET_BACKOFF >= I915_RESET_ENGINE);
if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
&gt->reset.flags))
continue;
@@ -1582,6 +1652,7 @@ void intel_gt_init_reset(struct intel_gt *gt)
init_waitqueue_head(&gt->reset.queue);
mutex_init(&gt->reset.mutex);
init_srcu_struct(&gt->reset.backoff_srcu);
+ INIT_WORK(&gt->wedge, set_wedged_work);
/*
* While undesirable to wait inside the shrinker, complain anyway.
@@ -1607,10 +1678,8 @@ static void intel_wedge_me(struct work_struct *work)
{
struct intel_wedge_me *w = container_of(work, typeof(*w), work.work);
- drm_err(&w->gt->i915->drm,
- "%s timed out, cancelling all in-flight rendering.\n",
- w->name);
- intel_gt_set_wedged(w->gt);
+ gt_err(w->gt, "%s timed out, cancelling all in-flight rendering.\n", w->name);
+ set_wedged_work(&w->gt->wedge);
}
void __intel_init_wedge(struct intel_wedge_me *w,
@@ -1632,6 +1701,24 @@ void __intel_fini_wedge(struct intel_wedge_me *w)
w->gt = NULL;
}
+/*
+ * Wa_22011802037 requires that we (or the GuC) ensure that no command
+ * streamers are executing MI_FORCE_WAKE while an engine reset is initiated.
+ */
+bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt)
+{
+ if (GRAPHICS_VER(gt->i915) < 11)
+ return false;
+
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0))
+ return true;
+
+ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+ return false;
+
+ return true;
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_reset.c"
#include "selftest_hangcheck.c"