summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gpu_error.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gpu_error.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c203
1 files changed, 125 insertions, 78 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 89725c9efc25..9d73d2216adc 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -332,7 +332,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
const struct i915_error_state_file_priv *error_priv)
{
struct drm_device *dev = error_priv->dev;
- struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_error_state *error = error_priv->error;
struct drm_i915_error_object *obj;
int i, j, offset, elt;
@@ -411,7 +411,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
err_printf(m, "DONE_REG: 0x%08x\n", error->done_reg);
}
- if (INTEL_INFO(dev)->gen == 7)
+ if (IS_GEN7(dev))
err_printf(m, "ERR_INT: 0x%08x\n", error->err_int);
for (i = 0; i < ARRAY_SIZE(error->ring); i++)
@@ -463,6 +463,18 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
}
}
+ if (error->ring[i].num_waiters) {
+ err_printf(m, "%s --- %d waiters\n",
+ dev_priv->engine[i].name,
+ error->ring[i].num_waiters);
+ for (j = 0; j < error->ring[i].num_waiters; j++) {
+ err_printf(m, " seqno 0x%08x for %s [%d]\n",
+ error->ring[i].waiters[j].seqno,
+ error->ring[i].waiters[j].comm,
+ error->ring[i].waiters[j].pid);
+ }
+ }
+
if ((obj = error->ring[i].ringbuffer)) {
err_printf(m, "%s --- ringbuffer = 0x%08x\n",
dev_priv->engine[i].name,
@@ -488,7 +500,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
hws_page[elt+1],
hws_page[elt+2],
hws_page[elt+3]);
- offset += 16;
+ offset += 16;
}
}
@@ -605,8 +617,9 @@ static void i915_error_state_free(struct kref *error_ref)
i915_error_object_free(error->ring[i].ringbuffer);
i915_error_object_free(error->ring[i].hws_page);
i915_error_object_free(error->ring[i].ctx);
- kfree(error->ring[i].requests);
i915_error_object_free(error->ring[i].wa_ctx);
+ kfree(error->ring[i].requests);
+ kfree(error->ring[i].waiters);
}
i915_error_object_free(error->semaphore_obj);
@@ -824,19 +837,18 @@ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv,
return error_code;
}
-static void i915_gem_record_fences(struct drm_device *dev,
+static void i915_gem_record_fences(struct drm_i915_private *dev_priv,
struct drm_i915_error_state *error)
{
- struct drm_i915_private *dev_priv = dev->dev_private;
int i;
- if (IS_GEN3(dev) || IS_GEN2(dev)) {
+ if (IS_GEN3(dev_priv) || IS_GEN2(dev_priv)) {
for (i = 0; i < dev_priv->num_fence_regs; i++)
error->fence[i] = I915_READ(FENCE_REG(i));
- } else if (IS_GEN5(dev) || IS_GEN4(dev)) {
+ } else if (IS_GEN5(dev_priv) || IS_GEN4(dev_priv)) {
for (i = 0; i < dev_priv->num_fence_regs; i++)
error->fence[i] = I915_READ64(FENCE_REG_965_LO(i));
- } else if (INTEL_INFO(dev)->gen >= 6) {
+ } else if (INTEL_GEN(dev_priv) >= 6) {
for (i = 0; i < dev_priv->num_fence_regs; i++)
error->fence[i] = I915_READ64(FENCE_REG_GEN6_LO(i));
}
@@ -851,7 +863,7 @@ static void gen8_record_semaphore_state(struct drm_i915_private *dev_priv,
struct intel_engine_cs *to;
enum intel_engine_id id;
- if (!i915_semaphore_is_enabled(dev_priv->dev))
+ if (!i915_semaphore_is_enabled(dev_priv))
return;
if (!error->semaphore_obj)
@@ -893,31 +905,71 @@ static void gen6_record_semaphore_state(struct drm_i915_private *dev_priv,
}
}
-static void i915_record_ring_state(struct drm_device *dev,
+static void engine_record_waiters(struct intel_engine_cs *engine,
+ struct drm_i915_error_ring *ering)
+{
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+ struct drm_i915_error_waiter *waiter;
+ struct rb_node *rb;
+ int count;
+
+ ering->num_waiters = 0;
+ ering->waiters = NULL;
+
+ spin_lock(&b->lock);
+ count = 0;
+ for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb))
+ count++;
+ spin_unlock(&b->lock);
+
+ waiter = NULL;
+ if (count)
+ waiter = kmalloc_array(count,
+ sizeof(struct drm_i915_error_waiter),
+ GFP_ATOMIC);
+ if (!waiter)
+ return;
+
+ ering->waiters = waiter;
+
+ spin_lock(&b->lock);
+ for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
+ struct intel_wait *w = container_of(rb, typeof(*w), node);
+
+ strcpy(waiter->comm, w->tsk->comm);
+ waiter->pid = w->tsk->pid;
+ waiter->seqno = w->seqno;
+ waiter++;
+
+ if (++ering->num_waiters == count)
+ break;
+ }
+ spin_unlock(&b->lock);
+}
+
+static void i915_record_ring_state(struct drm_i915_private *dev_priv,
struct drm_i915_error_state *error,
struct intel_engine_cs *engine,
struct drm_i915_error_ring *ering)
{
- struct drm_i915_private *dev_priv = dev->dev_private;
-
- if (INTEL_INFO(dev)->gen >= 6) {
+ if (INTEL_GEN(dev_priv) >= 6) {
ering->rc_psmi = I915_READ(RING_PSMI_CTL(engine->mmio_base));
ering->fault_reg = I915_READ(RING_FAULT_REG(engine));
- if (INTEL_INFO(dev)->gen >= 8)
+ if (INTEL_GEN(dev_priv) >= 8)
gen8_record_semaphore_state(dev_priv, error, engine,
ering);
else
gen6_record_semaphore_state(dev_priv, engine, ering);
}
- if (INTEL_INFO(dev)->gen >= 4) {
+ if (INTEL_GEN(dev_priv) >= 4) {
ering->faddr = I915_READ(RING_DMA_FADD(engine->mmio_base));
ering->ipeir = I915_READ(RING_IPEIR(engine->mmio_base));
ering->ipehr = I915_READ(RING_IPEHR(engine->mmio_base));
ering->instdone = I915_READ(RING_INSTDONE(engine->mmio_base));
ering->instps = I915_READ(RING_INSTPS(engine->mmio_base));
ering->bbaddr = I915_READ(RING_BBADDR(engine->mmio_base));
- if (INTEL_INFO(dev)->gen >= 8) {
+ if (INTEL_GEN(dev_priv) >= 8) {
ering->faddr |= (u64) I915_READ(RING_DMA_FADD_UDW(engine->mmio_base)) << 32;
ering->bbaddr |= (u64) I915_READ(RING_BBADDR_UDW(engine->mmio_base)) << 32;
}
@@ -929,20 +981,20 @@ static void i915_record_ring_state(struct drm_device *dev,
ering->instdone = I915_READ(GEN2_INSTDONE);
}
- ering->waiting = waitqueue_active(&engine->irq_queue);
+ ering->waiting = intel_engine_has_waiter(engine);
ering->instpm = I915_READ(RING_INSTPM(engine->mmio_base));
ering->acthd = intel_ring_get_active_head(engine);
- ering->seqno = engine->get_seqno(engine);
+ ering->seqno = intel_engine_get_seqno(engine);
ering->last_seqno = engine->last_submitted_seqno;
ering->start = I915_READ_START(engine);
ering->head = I915_READ_HEAD(engine);
ering->tail = I915_READ_TAIL(engine);
ering->ctl = I915_READ_CTL(engine);
- if (I915_NEED_GFX_HWS(dev)) {
+ if (I915_NEED_GFX_HWS(dev_priv)) {
i915_reg_t mmio;
- if (IS_GEN7(dev)) {
+ if (IS_GEN7(dev_priv)) {
switch (engine->id) {
default:
case RCS:
@@ -958,7 +1010,7 @@ static void i915_record_ring_state(struct drm_device *dev,
mmio = VEBOX_HWS_PGA_GEN7;
break;
}
- } else if (IS_GEN6(engine->dev)) {
+ } else if (IS_GEN6(engine->i915)) {
mmio = RING_HWS_PGA_GEN6(engine->mmio_base);
} else {
/* XXX: gen8 returns to sanity */
@@ -971,18 +1023,18 @@ static void i915_record_ring_state(struct drm_device *dev,
ering->hangcheck_score = engine->hangcheck.score;
ering->hangcheck_action = engine->hangcheck.action;
- if (USES_PPGTT(dev)) {
+ if (USES_PPGTT(dev_priv)) {
int i;
ering->vm_info.gfx_mode = I915_READ(RING_MODE_GEN7(engine));
- if (IS_GEN6(dev))
+ if (IS_GEN6(dev_priv))
ering->vm_info.pp_dir_base =
I915_READ(RING_PP_DIR_BASE_READ(engine));
- else if (IS_GEN7(dev))
+ else if (IS_GEN7(dev_priv))
ering->vm_info.pp_dir_base =
I915_READ(RING_PP_DIR_BASE(engine));
- else if (INTEL_INFO(dev)->gen >= 8)
+ else if (INTEL_GEN(dev_priv) >= 8)
for (i = 0; i < 4; i++) {
ering->vm_info.pdp[i] =
I915_READ(GEN8_RING_PDP_UDW(engine, i));
@@ -998,7 +1050,7 @@ static void i915_gem_record_active_context(struct intel_engine_cs *engine,
struct drm_i915_error_state *error,
struct drm_i915_error_ring *ering)
{
- struct drm_i915_private *dev_priv = engine->dev->dev_private;
+ struct drm_i915_private *dev_priv = engine->i915;
struct drm_i915_gem_object *obj;
/* Currently render ring is the only HW context user */
@@ -1016,34 +1068,33 @@ static void i915_gem_record_active_context(struct intel_engine_cs *engine,
}
}
-static void i915_gem_record_rings(struct drm_device *dev,
+static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
struct drm_i915_error_state *error)
{
- struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_ggtt *ggtt = &dev_priv->ggtt;
struct drm_i915_gem_request *request;
int i, count;
for (i = 0; i < I915_NUM_ENGINES; i++) {
struct intel_engine_cs *engine = &dev_priv->engine[i];
- struct intel_ringbuffer *rbuf;
error->ring[i].pid = -1;
- if (engine->dev == NULL)
+ if (!intel_engine_initialized(engine))
continue;
error->ring[i].valid = true;
- i915_record_ring_state(dev, error, engine, &error->ring[i]);
+ i915_record_ring_state(dev_priv, error, engine, &error->ring[i]);
+ engine_record_waiters(engine, &error->ring[i]);
request = i915_gem_find_active_request(engine);
if (request) {
struct i915_address_space *vm;
+ struct intel_ringbuffer *rb;
- vm = request->ctx && request->ctx->ppgtt ?
- &request->ctx->ppgtt->base :
- &ggtt->base;
+ vm = request->ctx->ppgtt ?
+ &request->ctx->ppgtt->base : &ggtt->base;
/* We need to copy these to an anonymous buffer
* as the simplest method to avoid being overwritten
@@ -1070,26 +1121,17 @@ static void i915_gem_record_rings(struct drm_device *dev,
}
rcu_read_unlock();
}
- }
- if (i915.enable_execlists) {
- /* TODO: This is only a small fix to keep basic error
- * capture working, but we need to add more information
- * for it to be useful (e.g. dump the context being
- * executed).
- */
- if (request)
- rbuf = request->ctx->engine[engine->id].ringbuf;
- else
- rbuf = dev_priv->kernel_context->engine[engine->id].ringbuf;
- } else
- rbuf = engine->buffer;
-
- error->ring[i].cpu_ring_head = rbuf->head;
- error->ring[i].cpu_ring_tail = rbuf->tail;
+ error->simulated |=
+ request->ctx->flags & CONTEXT_NO_ERROR_CAPTURE;
- error->ring[i].ringbuffer =
- i915_error_ggtt_object_create(dev_priv, rbuf->obj);
+ rb = request->ringbuf;
+ error->ring[i].cpu_ring_head = rb->head;
+ error->ring[i].cpu_ring_tail = rb->tail;
+ error->ring[i].ringbuffer =
+ i915_error_ggtt_object_create(dev_priv,
+ rb->obj);
+ }
error->ring[i].hws_page =
i915_error_ggtt_object_create(dev_priv,
@@ -1234,7 +1276,7 @@ static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
struct drm_i915_error_state *error)
{
- struct drm_device *dev = dev_priv->dev;
+ struct drm_device *dev = &dev_priv->drm;
int i;
/* General organization
@@ -1301,15 +1343,14 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
error->eir = I915_READ(EIR);
error->pgtbl_er = I915_READ(PGTBL_ER);
- i915_get_extra_instdone(dev, error->extra_instdone);
+ i915_get_extra_instdone(dev_priv, error->extra_instdone);
}
-static void i915_error_capture_msg(struct drm_device *dev,
+static void i915_error_capture_msg(struct drm_i915_private *dev_priv,
struct drm_i915_error_state *error,
u32 engine_mask,
const char *error_msg)
{
- struct drm_i915_private *dev_priv = dev->dev_private;
u32 ecode;
int ring_id = -1, len;
@@ -1317,7 +1358,7 @@ static void i915_error_capture_msg(struct drm_device *dev,
len = scnprintf(error->error_msg, sizeof(error->error_msg),
"GPU HANG: ecode %d:%d:0x%08x",
- INTEL_INFO(dev)->gen, ring_id, ecode);
+ INTEL_GEN(dev_priv), ring_id, ecode);
if (ring_id != -1 && error->ring[ring_id].pid != -1)
len += scnprintf(error->error_msg + len,
@@ -1352,14 +1393,17 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
* out a structure which becomes available in debugfs for user level tools
* to pick up.
*/
-void i915_capture_error_state(struct drm_device *dev, u32 engine_mask,
+void i915_capture_error_state(struct drm_i915_private *dev_priv,
+ u32 engine_mask,
const char *error_msg)
{
static bool warned;
- struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_error_state *error;
unsigned long flags;
+ if (READ_ONCE(dev_priv->gpu_error.first_error))
+ return;
+
/* Account for pipe specific data like PIPE*STAT */
error = kzalloc(sizeof(*error), GFP_ATOMIC);
if (!error) {
@@ -1372,23 +1416,25 @@ void i915_capture_error_state(struct drm_device *dev, u32 engine_mask,
i915_capture_gen_state(dev_priv, error);
i915_capture_reg_state(dev_priv, error);
i915_gem_capture_buffers(dev_priv, error);
- i915_gem_record_fences(dev, error);
- i915_gem_record_rings(dev, error);
+ i915_gem_record_fences(dev_priv, error);
+ i915_gem_record_rings(dev_priv, error);
do_gettimeofday(&error->time);
- error->overlay = intel_overlay_capture_error_state(dev);
- error->display = intel_display_capture_error_state(dev);
+ error->overlay = intel_overlay_capture_error_state(dev_priv);
+ error->display = intel_display_capture_error_state(dev_priv);
- i915_error_capture_msg(dev, error, engine_mask, error_msg);
+ i915_error_capture_msg(dev_priv, error, engine_mask, error_msg);
DRM_INFO("%s\n", error->error_msg);
- spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
- if (dev_priv->gpu_error.first_error == NULL) {
- dev_priv->gpu_error.first_error = error;
- error = NULL;
+ if (!error->simulated) {
+ spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
+ if (!dev_priv->gpu_error.first_error) {
+ dev_priv->gpu_error.first_error = error;
+ error = NULL;
+ }
+ spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
}
- spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
if (error) {
i915_error_state_free(&error->ref);
@@ -1400,7 +1446,8 @@ void i915_capture_error_state(struct drm_device *dev, u32 engine_mask,
DRM_INFO("Please file a _new_ bug report on bugs.freedesktop.org against DRI -> DRM/Intel\n");
DRM_INFO("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n");
DRM_INFO("The gpu crash dump is required to analyze gpu hangs, so please always attach it.\n");
- DRM_INFO("GPU crash dump saved to /sys/class/drm/card%d/error\n", dev->primary->index);
+ DRM_INFO("GPU crash dump saved to /sys/class/drm/card%d/error\n",
+ dev_priv->drm.primary->index);
warned = true;
}
}
@@ -1408,7 +1455,7 @@ void i915_capture_error_state(struct drm_device *dev, u32 engine_mask,
void i915_error_state_get(struct drm_device *dev,
struct i915_error_state_file_priv *error_priv)
{
- struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_i915_private *dev_priv = to_i915(dev);
spin_lock_irq(&dev_priv->gpu_error.lock);
error_priv->error = dev_priv->gpu_error.first_error;
@@ -1426,7 +1473,7 @@ void i915_error_state_put(struct i915_error_state_file_priv *error_priv)
void i915_destroy_error_state(struct drm_device *dev)
{
- struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_error_state *error;
spin_lock_irq(&dev_priv->gpu_error.lock);
@@ -1450,17 +1497,17 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
}
/* NB: please notice the memset */
-void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone)
+void i915_get_extra_instdone(struct drm_i915_private *dev_priv,
+ uint32_t *instdone)
{
- struct drm_i915_private *dev_priv = dev->dev_private;
memset(instdone, 0, sizeof(*instdone) * I915_NUM_INSTDONE_REG);
- if (IS_GEN2(dev) || IS_GEN3(dev))
+ if (IS_GEN2(dev_priv) || IS_GEN3(dev_priv))
instdone[0] = I915_READ(GEN2_INSTDONE);
- else if (IS_GEN4(dev) || IS_GEN5(dev) || IS_GEN6(dev)) {
+ else if (IS_GEN4(dev_priv) || IS_GEN5(dev_priv) || IS_GEN6(dev_priv)) {
instdone[0] = I915_READ(RING_INSTDONE(RENDER_RING_BASE));
instdone[1] = I915_READ(GEN4_INSTDONE1);
- } else if (INTEL_INFO(dev)->gen >= 7) {
+ } else if (INTEL_GEN(dev_priv) >= 7) {
instdone[0] = I915_READ(RING_INSTDONE(RENDER_RING_BASE));
instdone[1] = I915_READ(GEN7_SC_INSTDONE);
instdone[2] = I915_READ(GEN7_SAMPLER_INSTDONE);