summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt/selftest_hangcheck.c')
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c136
1 files changed, 67 insertions, 69 deletions
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 15d63435ec4d..4f252f704975 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -6,7 +6,9 @@
#include <linux/kthread.h>
#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_internal.h"
+#include "i915_gem_evict.h"
#include "intel_gt.h"
#include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h"
@@ -71,7 +73,7 @@ static int hang_init(struct hang *h, struct intel_gt *gt)
h->seqno = memset(vaddr, 0xff, PAGE_SIZE);
vaddr = i915_gem_object_pin_map_unlocked(h->obj,
- i915_coherent_map_type(gt->i915, h->obj, false));
+ intel_gt_coherent_map_type(gt, h->obj, false));
if (IS_ERR(vaddr)) {
err = PTR_ERR(vaddr);
goto err_unpin_hws;
@@ -94,23 +96,8 @@ err_ctx:
static u64 hws_address(const struct i915_vma *hws,
const struct i915_request *rq)
{
- return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context);
-}
-
-static int move_to_active(struct i915_vma *vma,
- struct i915_request *rq,
- unsigned int flags)
-{
- int err;
-
- i915_vma_lock(vma);
- err = i915_request_await_object(rq, vma->obj,
- flags & EXEC_OBJECT_WRITE);
- if (err == 0)
- err = i915_vma_move_to_active(vma, rq, flags);
- i915_vma_unlock(vma);
-
- return err;
+ return i915_vma_offset(hws) +
+ offset_in_page(sizeof(u32) * rq->fence.context);
}
static struct i915_request *
@@ -132,7 +119,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
return ERR_CAST(obj);
}
- vaddr = i915_gem_object_pin_map_unlocked(obj, i915_coherent_map_type(gt->i915, obj, false));
+ vaddr = i915_gem_object_pin_map_unlocked(obj, intel_gt_coherent_map_type(gt, obj, false));
if (IS_ERR(vaddr)) {
i915_gem_object_put(obj);
i915_vm_put(vm);
@@ -173,11 +160,11 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
goto unpin_hws;
}
- err = move_to_active(vma, rq, 0);
+ err = igt_vma_move_to_active_unlocked(vma, rq, 0);
if (err)
goto cancel_rq;
- err = move_to_active(hws, rq, 0);
+ err = igt_vma_move_to_active_unlocked(hws, rq, 0);
if (err)
goto cancel_rq;
@@ -194,8 +181,8 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
*batch++ = MI_NOOP;
*batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
- *batch++ = lower_32_bits(vma->node.start);
- *batch++ = upper_32_bits(vma->node.start);
+ *batch++ = lower_32_bits(i915_vma_offset(vma));
+ *batch++ = upper_32_bits(i915_vma_offset(vma));
} else if (GRAPHICS_VER(gt->i915) >= 6) {
*batch++ = MI_STORE_DWORD_IMM_GEN4;
*batch++ = 0;
@@ -208,7 +195,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
*batch++ = MI_NOOP;
*batch++ = MI_BATCH_BUFFER_START | 1 << 8;
- *batch++ = lower_32_bits(vma->node.start);
+ *batch++ = lower_32_bits(i915_vma_offset(vma));
} else if (GRAPHICS_VER(gt->i915) >= 4) {
*batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*batch++ = 0;
@@ -221,7 +208,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
*batch++ = MI_NOOP;
*batch++ = MI_BATCH_BUFFER_START | 2 << 6;
- *batch++ = lower_32_bits(vma->node.start);
+ *batch++ = lower_32_bits(i915_vma_offset(vma));
} else {
*batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
*batch++ = lower_32_bits(hws_address(hws, rq));
@@ -233,7 +220,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
*batch++ = MI_NOOP;
*batch++ = MI_BATCH_BUFFER_START | 2 << 6;
- *batch++ = lower_32_bits(vma->node.start);
+ *batch++ = lower_32_bits(i915_vma_offset(vma));
}
*batch++ = MI_BATCH_BUFFER_END; /* not reached */
intel_gt_chipset_flush(engine->gt);
@@ -248,7 +235,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
if (GRAPHICS_VER(gt->i915) <= 5)
flags |= I915_DISPATCH_SECURE;
- err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
+ err = rq->engine->emit_bb_start(rq, i915_vma_offset(vma), PAGE_SIZE, flags);
cancel_rq:
if (err) {
@@ -332,7 +319,7 @@ static int igt_hang_sanitycheck(void *arg)
i915_request_add(rq);
timeout = 0;
- intel_wedge_on_timeout(&w, gt, HZ / 10 /* 100ms */)
+ intel_wedge_on_timeout(&w, gt, HZ / 5 /* 200ms */)
timeout = i915_request_wait(rq, 0,
MAX_SCHEDULE_TIMEOUT);
if (intel_gt_is_wedged(gt))
@@ -561,7 +548,7 @@ static int igt_reset_fail_engine(void *arg)
struct intel_engine_cs *engine;
enum intel_engine_id id;
- /* Check that we can recover from engine-reset failues */
+ /* Check that we can recover from engine-reset failures */
if (!intel_has_reset_engine(gt))
return 0;
@@ -864,10 +851,13 @@ static int igt_reset_active_engine(void *arg)
}
struct active_engine {
- struct task_struct *task;
+ struct kthread_worker *worker;
+ struct kthread_work work;
struct intel_engine_cs *engine;
unsigned long resets;
unsigned int flags;
+ bool stop;
+ int result;
};
#define TEST_ACTIVE BIT(0)
@@ -898,10 +888,10 @@ static int active_request_put(struct i915_request *rq)
return err;
}
-static int active_engine(void *data)
+static void active_engine(struct kthread_work *work)
{
I915_RND_STATE(prng);
- struct active_engine *arg = data;
+ struct active_engine *arg = container_of(work, typeof(*arg), work);
struct intel_engine_cs *engine = arg->engine;
struct i915_request *rq[8] = {};
struct intel_context *ce[ARRAY_SIZE(rq)];
@@ -911,16 +901,17 @@ static int active_engine(void *data)
for (count = 0; count < ARRAY_SIZE(ce); count++) {
ce[count] = intel_context_create(engine);
if (IS_ERR(ce[count])) {
- err = PTR_ERR(ce[count]);
- pr_err("[%s] Create context #%ld failed: %d!\n", engine->name, count, err);
- while (--count)
+ arg->result = PTR_ERR(ce[count]);
+ pr_err("[%s] Create context #%ld failed: %d!\n",
+ engine->name, count, arg->result);
+ while (count--)
intel_context_put(ce[count]);
- return err;
+ return;
}
}
count = 0;
- while (!kthread_should_stop()) {
+ while (!READ_ONCE(arg->stop)) {
unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1);
struct i915_request *old = rq[idx];
struct i915_request *new;
@@ -965,7 +956,7 @@ static int active_engine(void *data)
intel_context_put(ce[count]);
}
- return err;
+ arg->result = err;
}
static int __igt_reset_engines(struct intel_gt *gt,
@@ -974,6 +965,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
{
struct i915_gpu_error *global = &gt->i915->gpu_error;
struct intel_engine_cs *engine, *other;
+ struct active_engine *threads;
enum intel_engine_id id, tmp;
struct hang h;
int err = 0;
@@ -994,8 +986,11 @@ static int __igt_reset_engines(struct intel_gt *gt,
h.ctx->sched.priority = 1024;
}
+ threads = kmalloc_array(I915_NUM_ENGINES, sizeof(*threads), GFP_KERNEL);
+ if (!threads)
+ return -ENOMEM;
+
for_each_engine(engine, gt, id) {
- struct active_engine threads[I915_NUM_ENGINES] = {};
unsigned long device = i915_reset_count(global);
unsigned long count = 0, reported;
bool using_guc = intel_engine_uses_guc(engine);
@@ -1014,9 +1009,9 @@ static int __igt_reset_engines(struct intel_gt *gt,
break;
}
- memset(threads, 0, sizeof(threads));
+ memset(threads, 0, sizeof(*threads) * I915_NUM_ENGINES);
for_each_engine(other, gt, tmp) {
- struct task_struct *tsk;
+ struct kthread_worker *worker;
threads[tmp].resets =
i915_reset_engine_count(global, other);
@@ -1030,19 +1025,21 @@ static int __igt_reset_engines(struct intel_gt *gt,
threads[tmp].engine = other;
threads[tmp].flags = flags;
- tsk = kthread_run(active_engine, &threads[tmp],
- "igt/%s", other->name);
- if (IS_ERR(tsk)) {
- err = PTR_ERR(tsk);
- pr_err("[%s] Thread spawn failed: %d!\n", engine->name, err);
+ worker = kthread_run_worker(0, "igt/%s",
+ other->name);
+ if (IS_ERR(worker)) {
+ err = PTR_ERR(worker);
+ pr_err("[%s] Worker create failed: %d!\n",
+ engine->name, err);
goto unwind;
}
- threads[tmp].task = tsk;
- get_task_struct(tsk);
- }
+ threads[tmp].worker = worker;
- yield(); /* start all threads before we begin */
+ kthread_init_work(&threads[tmp].work, active_engine);
+ kthread_queue_work(threads[tmp].worker,
+ &threads[tmp].work);
+ }
st_engine_heartbeat_disable_no_pm(engine);
GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id,
@@ -1191,17 +1188,20 @@ unwind:
for_each_engine(other, gt, tmp) {
int ret;
- if (!threads[tmp].task)
+ if (!threads[tmp].worker)
continue;
- ret = kthread_stop(threads[tmp].task);
+ WRITE_ONCE(threads[tmp].stop, true);
+ kthread_flush_work(&threads[tmp].work);
+ ret = READ_ONCE(threads[tmp].result);
if (ret) {
pr_err("kthread for other engine %s failed, err=%d\n",
other->name, ret);
if (!err)
err = ret;
}
- put_task_struct(threads[tmp].task);
+
+ kthread_destroy_worker(threads[tmp].worker);
/* GuC based resets are not logged per engine */
if (!using_guc) {
@@ -1234,6 +1234,7 @@ unwind:
break;
}
}
+ kfree(threads);
if (intel_gt_is_wedged(gt))
err = -EIO;
@@ -1295,13 +1296,15 @@ static int igt_reset_wait(void *arg)
{
struct intel_gt *gt = arg;
struct i915_gpu_error *global = &gt->i915->gpu_error;
- struct intel_engine_cs *engine = gt->engine[RCS0];
+ struct intel_engine_cs *engine;
struct i915_request *rq;
unsigned int reset_count;
struct hang h;
long timeout;
int err;
+ engine = intel_selftest_find_any_engine(gt);
+
if (!engine || !intel_engine_can_store_dword(engine))
return 0;
@@ -1382,7 +1385,7 @@ static int evict_vma(void *data)
complete(&arg->completion);
mutex_lock(&vm->mutex);
- err = i915_gem_evict_for_node(vm, &evict, 0);
+ err = i915_gem_evict_for_node(vm, NULL, &evict, 0);
mutex_unlock(&vm->mutex);
return err;
@@ -1425,7 +1428,7 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
int (*fn)(void *),
unsigned int flags)
{
- struct intel_engine_cs *engine = gt->engine[RCS0];
+ struct intel_engine_cs *engine;
struct drm_i915_gem_object *obj;
struct task_struct *tsk = NULL;
struct i915_request *rq;
@@ -1437,6 +1440,8 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
if (!gt->ggtt->num_fences && flags & EXEC_OBJECT_NEEDS_FENCE)
return 0;
+ engine = intel_selftest_find_any_engine(gt);
+
if (!engine || !intel_engine_can_store_dword(engine))
return 0;
@@ -1499,18 +1504,9 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
}
}
- i915_vma_lock(arg.vma);
- err = i915_request_await_object(rq, arg.vma->obj,
- flags & EXEC_OBJECT_WRITE);
- if (err == 0) {
- err = i915_vma_move_to_active(arg.vma, rq, flags);
- if (err)
- pr_err("[%s] Move to active failed: %d!\n", engine->name, err);
- } else {
- pr_err("[%s] Request await failed: %d!\n", engine->name, err);
- }
-
- i915_vma_unlock(arg.vma);
+ err = igt_vma_move_to_active_unlocked(arg.vma, rq, flags);
+ if (err)
+ pr_err("[%s] Move to active failed: %d!\n", engine->name, err);
if (flags & EXEC_OBJECT_NEEDS_FENCE)
i915_vma_unpin_fence(arg.vma);
@@ -1812,12 +1808,14 @@ static int igt_handle_error(void *arg)
{
struct intel_gt *gt = arg;
struct i915_gpu_error *global = &gt->i915->gpu_error;
- struct intel_engine_cs *engine = gt->engine[RCS0];
+ struct intel_engine_cs *engine;
struct hang h;
struct i915_request *rq;
struct i915_gpu_coredump *error;
int err;
+ engine = intel_selftest_find_any_engine(gt);
+
/* Check that we can issue a global GPU and engine reset */
if (!intel_has_reset_engine(gt))