drm/i915: Tighten reset_counter for reset status

In the reset_counter, we use two bits to track a GPU hang and reset. The low bit is a "reset-in-progress" flag that we set to signal when we need to break waiters in order for the recovery task to grab the mutex. As soon as the recovery task has the mutex, we can clear that flag (which we do by incrementing the reset_counter thereby incrementing the gobal reset epoch). By clearing that flag when the recovery task holds the struct_mutex, we can forgo a second flag that simply tells GEM to ignore the "reset-in-progress" flag. The second flag we store in the reset_counter is whether the reset failed and we consider the GPU terminally wedged. Whilst this flag is set, all access to the GPU (at least through GEM rather than direct mmio access) is verboten. PS: Fun is in store, as in the future we want to move from a global reset epoch to a per-engine reset engine with request recovery. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/1460565315-7748-6-git-send-email-chris@chris-wilson.co.uk
author: Chris Wilson <chris@chris-wilson.co.uk> 2016-04-13 17:35:05 +0100
committer: Chris Wilson <chris@chris-wilson.co.uk> 2016-04-14 10:45:40 +0100
commit: d98c52cf4fa2bb7116a89f1132fc773b1cfa6436 (patch)
tree: 94e588b0ef5dcdbc41748fc37a49de31b4f8641d /drivers/gpu/drm/i915/i915_irq.c
parent: 7f1847ebf48b2e5753691cc9e2a404c260383933 (diff)
1 files changed, 2 insertions, 19 deletions
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index c2269c103e30..be78f5229114 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2483,7 +2483,6 @@ static void i915_error_wake_up(struct drm_i915_private *dev_priv,
 static void i915_reset_and_wakeup(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_gpu_error *error = &dev_priv->gpu_error;
 	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
 	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
 	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
@@ -2501,7 +2500,7 @@ static void i915_reset_and_wakeup(struct drm_device *dev)
 	 * the reset in-progress bit is only ever set by code outside of this
 	 * work we don't need to worry about any other races.
 	 */
-	if (i915_reset_in_progress_or_wedged(error) && !i915_terminally_wedged(error)) {
+	if (i915_reset_in_progress(&dev_priv->gpu_error)) {
 		DRM_DEBUG_DRIVER("resetting chip\n");
 		kobject_uevent_env(&dev->primary->kdev->kobj, KOBJ_CHANGE,
 				   reset_event);
@@ -2529,25 +2528,9 @@ static void i915_reset_and_wakeup(struct drm_device *dev)
 
 		intel_runtime_pm_put(dev_priv);
 
-		if (ret == 0) {
-			/*
-			 * After all the gem state is reset, increment the reset
-			 * counter and wake up everyone waiting for the reset to
-			 * complete.
-			 *
-			 * Since unlock operations are a one-sided barrier only,
-			 * we need to insert a barrier here to order any seqno
-			 * updates before
-			 * the counter increment.
-			 */
-			smp_mb__before_atomic();
-			atomic_inc(&dev_priv->gpu_error.reset_counter);
-
+		if (ret == 0)
 			kobject_uevent_env(&dev->primary->kdev->kobj,
 					   KOBJ_CHANGE, reset_done_event);
-		} else {
-			atomic_or(I915_WEDGED, &error->reset_counter);
-		}
 
 		/*
 		 * Note: The wake_up also serves as a memory barrier so that
author	Chris Wilson <chris@chris-wilson.co.uk>	2016-04-13 17:35:05 +0100
committer	Chris Wilson <chris@chris-wilson.co.uk>	2016-04-14 10:45:40 +0100
commit	d98c52cf4fa2bb7116a89f1132fc773b1cfa6436 (patch)
tree	94e588b0ef5dcdbc41748fc37a49de31b4f8641d /drivers/gpu/drm/i915/i915_irq.c
parent	7f1847ebf48b2e5753691cc9e2a404c260383933 (diff)