diff options
Diffstat (limited to 'drivers/gpu/drm/i915/selftests/intel_hangcheck.c')
-rw-r--r-- | drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 222 |
1 files changed, 143 insertions, 79 deletions
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index d1f91a533afa..df7898c8edcb 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -33,6 +33,7 @@ struct hang { struct drm_i915_private *i915; struct drm_i915_gem_object *hws; struct drm_i915_gem_object *obj; + struct i915_gem_context *ctx; u32 *seqno; u32 *batch; }; @@ -45,9 +46,15 @@ static int hang_init(struct hang *h, struct drm_i915_private *i915) memset(h, 0, sizeof(*h)); h->i915 = i915; + h->ctx = kernel_context(i915); + if (IS_ERR(h->ctx)) + return PTR_ERR(h->ctx); + h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(h->hws)) - return PTR_ERR(h->hws); + if (IS_ERR(h->hws)) { + err = PTR_ERR(h->hws); + goto err_ctx; + } h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(h->obj)) { @@ -79,17 +86,19 @@ err_obj: i915_gem_object_put(h->obj); err_hws: i915_gem_object_put(h->hws); +err_ctx: + kernel_context_close(h->ctx); return err; } static u64 hws_address(const struct i915_vma *hws, - const struct drm_i915_gem_request *rq) + const struct i915_request *rq) { return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context); } static int emit_recurse_batch(struct hang *h, - struct drm_i915_gem_request *rq) + struct i915_request *rq) { struct drm_i915_private *i915 = h->i915; struct i915_address_space *vm = rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &i915->ggtt.base; @@ -195,12 +204,10 @@ unpin_vma: return err; } -static struct drm_i915_gem_request * -hang_create_request(struct hang *h, - struct intel_engine_cs *engine, - struct i915_gem_context *ctx) +static struct i915_request * +hang_create_request(struct hang *h, struct intel_engine_cs *engine) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; int err; if (i915_gem_object_is_active(h->obj)) { @@ -225,25 +232,76 @@ hang_create_request(struct hang *h, h->batch = vaddr; } - rq = i915_gem_request_alloc(engine, ctx); + rq = i915_request_alloc(engine, h->ctx); if (IS_ERR(rq)) return rq; err = emit_recurse_batch(h, rq); if (err) { - __i915_add_request(rq, false); + __i915_request_add(rq, false); return ERR_PTR(err); } return rq; } -static u32 hws_seqno(const struct hang *h, - const struct drm_i915_gem_request *rq) +static u32 hws_seqno(const struct hang *h, const struct i915_request *rq) { return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]); } +struct wedge_me { + struct delayed_work work; + struct drm_i915_private *i915; + const void *symbol; +}; + +static void wedge_me(struct work_struct *work) +{ + struct wedge_me *w = container_of(work, typeof(*w), work.work); + + pr_err("%pS timed out, cancelling all further testing.\n", + w->symbol); + i915_gem_set_wedged(w->i915); +} + +static void __init_wedge(struct wedge_me *w, + struct drm_i915_private *i915, + long timeout, + const void *symbol) +{ + w->i915 = i915; + w->symbol = symbol; + + INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me); + schedule_delayed_work(&w->work, timeout); +} + +static void __fini_wedge(struct wedge_me *w) +{ + cancel_delayed_work_sync(&w->work); + destroy_delayed_work_on_stack(&w->work); + w->i915 = NULL; +} + +#define wedge_on_timeout(W, DEV, TIMEOUT) \ + for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \ + (W)->i915; \ + __fini_wedge((W))) + +static noinline int +flush_test(struct drm_i915_private *i915, unsigned int flags) +{ + struct wedge_me w; + + cond_resched(); + + wedge_on_timeout(&w, i915, HZ) + i915_gem_wait_for_idle(i915, flags); + + return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; +} + static void hang_fini(struct hang *h) { *h->batch = MI_BATCH_BUFFER_END; @@ -255,10 +313,12 @@ static void hang_fini(struct hang *h) i915_gem_object_unpin_map(h->hws); i915_gem_object_put(h->hws); - i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED); + kernel_context_close(h->ctx); + + flush_test(h->i915, I915_WAIT_LOCKED); } -static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq) +static bool wait_for_hang(struct hang *h, struct i915_request *rq) { return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq), rq->fence.seqno), @@ -271,7 +331,7 @@ static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq) static int igt_hang_sanitycheck(void *arg) { struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct intel_engine_cs *engine; enum intel_engine_id id; struct hang h; @@ -290,7 +350,7 @@ static int igt_hang_sanitycheck(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - rq = hang_create_request(&h, engine, i915->kernel_context); + rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); pr_err("Failed to create request for %s, err=%d\n", @@ -298,17 +358,17 @@ static int igt_hang_sanitycheck(void *arg) goto fini; } - i915_gem_request_get(rq); + i915_request_get(rq); *h.batch = MI_BATCH_BUFFER_END; i915_gem_chipset_flush(i915); - __i915_add_request(rq, true); + __i915_request_add(rq, true); - timeout = i915_wait_request(rq, + timeout = i915_request_wait(rq, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); - i915_gem_request_put(rq); + i915_request_put(rq); if (timeout < 0) { err = timeout; @@ -424,19 +484,18 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { if (active) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; mutex_lock(&i915->drm.struct_mutex); - rq = hang_create_request(&h, engine, - i915->kernel_context); + rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); mutex_unlock(&i915->drm.struct_mutex); break; } - i915_gem_request_get(rq); - __i915_add_request(rq, true); + i915_request_get(rq); + __i915_request_add(rq, true); mutex_unlock(&i915->drm.struct_mutex); if (!wait_for_hang(&h, rq)) { @@ -447,12 +506,12 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) intel_engine_dump(engine, &p, "%s\n", engine->name); - i915_gem_request_put(rq); + i915_request_put(rq); err = -EIO; break; } - i915_gem_request_put(rq); + i915_request_put(rq); } engine->hangcheck.stalled = true; @@ -487,7 +546,9 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) if (err) break; - cond_resched(); + err = flush_test(i915, 0); + if (err) + break; } if (i915_terminally_wedged(&i915->gpu_error)) @@ -515,7 +576,7 @@ static int igt_reset_active_engine(void *arg) static int active_engine(void *data) { struct intel_engine_cs *engine = data; - struct drm_i915_gem_request *rq[2] = {}; + struct i915_request *rq[2] = {}; struct i915_gem_context *ctx[2]; struct drm_file *file; unsigned long count = 0; @@ -544,29 +605,29 @@ static int active_engine(void *data) while (!kthread_should_stop()) { unsigned int idx = count++ & 1; - struct drm_i915_gem_request *old = rq[idx]; - struct drm_i915_gem_request *new; + struct i915_request *old = rq[idx]; + struct i915_request *new; mutex_lock(&engine->i915->drm.struct_mutex); - new = i915_gem_request_alloc(engine, ctx[idx]); + new = i915_request_alloc(engine, ctx[idx]); if (IS_ERR(new)) { mutex_unlock(&engine->i915->drm.struct_mutex); err = PTR_ERR(new); break; } - rq[idx] = i915_gem_request_get(new); - i915_add_request(new); + rq[idx] = i915_request_get(new); + i915_request_add(new); mutex_unlock(&engine->i915->drm.struct_mutex); if (old) { - i915_wait_request(old, 0, MAX_SCHEDULE_TIMEOUT); - i915_gem_request_put(old); + i915_request_wait(old, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(old); } } for (count = 0; count < ARRAY_SIZE(rq); count++) - i915_gem_request_put(rq[count]); + i915_request_put(rq[count]); err_file: mock_file_free(engine->i915, file); @@ -630,19 +691,18 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915, set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { if (active) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; mutex_lock(&i915->drm.struct_mutex); - rq = hang_create_request(&h, engine, - i915->kernel_context); + rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); mutex_unlock(&i915->drm.struct_mutex); break; } - i915_gem_request_get(rq); - __i915_add_request(rq, true); + i915_request_get(rq); + __i915_request_add(rq, true); mutex_unlock(&i915->drm.struct_mutex); if (!wait_for_hang(&h, rq)) { @@ -653,12 +713,12 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915, intel_engine_dump(engine, &p, "%s\n", engine->name); - i915_gem_request_put(rq); + i915_request_put(rq); err = -EIO; break; } - i915_gem_request_put(rq); + i915_request_put(rq); } engine->hangcheck.stalled = true; @@ -726,7 +786,9 @@ unwind: if (err) break; - cond_resched(); + err = flush_test(i915, 0); + if (err) + break; } if (i915_terminally_wedged(&i915->gpu_error)) @@ -751,7 +813,7 @@ static int igt_reset_active_engine_others(void *arg) return __igt_reset_engine_others(arg, true); } -static u32 fake_hangcheck(struct drm_i915_gem_request *rq) +static u32 fake_hangcheck(struct i915_request *rq) { u32 reset_count; @@ -769,7 +831,7 @@ static u32 fake_hangcheck(struct drm_i915_gem_request *rq) static int igt_wait_reset(void *arg) { struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *rq; + struct i915_request *rq; unsigned int reset_count; struct hang h; long timeout; @@ -787,14 +849,14 @@ static int igt_wait_reset(void *arg) if (err) goto unlock; - rq = hang_create_request(&h, i915->engine[RCS], i915->kernel_context); + rq = hang_create_request(&h, i915->engine[RCS]); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto fini; } - i915_gem_request_get(rq); - __i915_add_request(rq, true); + i915_request_get(rq); + __i915_request_add(rq, true); if (!wait_for_hang(&h, rq)) { struct drm_printer p = drm_info_printer(i915->drm.dev); @@ -812,9 +874,9 @@ static int igt_wait_reset(void *arg) reset_count = fake_hangcheck(rq); - timeout = i915_wait_request(rq, I915_WAIT_LOCKED, 10); + timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10); if (timeout < 0) { - pr_err("i915_wait_request failed on a stuck request: err=%ld\n", + pr_err("i915_request_wait failed on a stuck request: err=%ld\n", timeout); err = timeout; goto out_rq; @@ -828,7 +890,7 @@ static int igt_wait_reset(void *arg) } out_rq: - i915_gem_request_put(rq); + i915_request_put(rq); fini: hang_fini(&h); unlock: @@ -859,37 +921,35 @@ static int igt_reset_queue(void *arg) goto unlock; for_each_engine(engine, i915, id) { - struct drm_i915_gem_request *prev; + struct i915_request *prev; IGT_TIMEOUT(end_time); unsigned int count; if (!intel_engine_can_store_dword(engine)) continue; - prev = hang_create_request(&h, engine, i915->kernel_context); + prev = hang_create_request(&h, engine); if (IS_ERR(prev)) { err = PTR_ERR(prev); goto fini; } - i915_gem_request_get(prev); - __i915_add_request(prev, true); + i915_request_get(prev); + __i915_request_add(prev, true); count = 0; do { - struct drm_i915_gem_request *rq; + struct i915_request *rq; unsigned int reset_count; - rq = hang_create_request(&h, - engine, - i915->kernel_context); + rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto fini; } - i915_gem_request_get(rq); - __i915_add_request(rq, true); + i915_request_get(rq); + __i915_request_add(rq, true); if (!wait_for_hang(&h, prev)) { struct drm_printer p = drm_info_printer(i915->drm.dev); @@ -899,8 +959,8 @@ static int igt_reset_queue(void *arg) intel_engine_dump(prev->engine, &p, "%s\n", prev->engine->name); - i915_gem_request_put(rq); - i915_gem_request_put(prev); + i915_request_put(rq); + i915_request_put(prev); i915_reset(i915, 0); i915_gem_set_wedged(i915); @@ -919,8 +979,8 @@ static int igt_reset_queue(void *arg) if (prev->fence.error != -EIO) { pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", prev->fence.error); - i915_gem_request_put(rq); - i915_gem_request_put(prev); + i915_request_put(rq); + i915_request_put(prev); err = -EINVAL; goto fini; } @@ -928,21 +988,21 @@ static int igt_reset_queue(void *arg) if (rq->fence.error) { pr_err("Fence error status not zero [%d] after unrelated reset\n", rq->fence.error); - i915_gem_request_put(rq); - i915_gem_request_put(prev); + i915_request_put(rq); + i915_request_put(prev); err = -EINVAL; goto fini; } if (i915_reset_count(&i915->gpu_error) == reset_count) { pr_err("No GPU reset recorded!\n"); - i915_gem_request_put(rq); - i915_gem_request_put(prev); + i915_request_put(rq); + i915_request_put(prev); err = -EINVAL; goto fini; } - i915_gem_request_put(prev); + i915_request_put(prev); prev = rq; count++; } while (time_before(jiffies, end_time)); @@ -951,7 +1011,11 @@ static int igt_reset_queue(void *arg) *h.batch = MI_BATCH_BUFFER_END; i915_gem_chipset_flush(i915); - i915_gem_request_put(prev); + i915_request_put(prev); + + err = flush_test(i915, I915_WAIT_LOCKED); + if (err) + break; } fini: @@ -971,7 +1035,7 @@ static int igt_handle_error(void *arg) struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine = i915->engine[RCS]; struct hang h; - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct i915_gpu_state *error; int err; @@ -989,14 +1053,14 @@ static int igt_handle_error(void *arg) if (err) goto err_unlock; - rq = hang_create_request(&h, engine, i915->kernel_context); + rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_fini; } - i915_gem_request_get(rq); - __i915_add_request(rq, true); + i915_request_get(rq); + __i915_request_add(rq, true); if (!wait_for_hang(&h, rq)) { struct drm_printer p = drm_info_printer(i915->drm.dev); @@ -1033,7 +1097,7 @@ static int igt_handle_error(void *arg) } err_request: - i915_gem_request_put(rq); + i915_request_put(rq); err_fini: hang_fini(&h); err_unlock: |