diff options
author | Tvrtko Ursulin <tvrtko.ursulin@intel.com> | 2022-10-20 14:08:41 +0100 |
---|---|---|
committer | Tvrtko Ursulin <tvrtko.ursulin@intel.com> | 2022-10-20 22:06:19 +0100 |
commit | 6407cf533217e09dfd895e64984c3f1ee3802373 (patch) | |
tree | 8d9a5c369f06e37497d144b671c035800a6096ff /drivers/gpu/drm/i915/gt | |
parent | 03eababbf383e6340ef900c91315c97bd9cdd0b7 (diff) |
drm/i915/selftests: Stop using kthread_stop()
Since a7c01fa93aeb ("signal: break out of wait loops on kthread_stop()")
kthread_stop() started asserting a pending signal which wreaks havoc with
a few of our selftests. Mainly because they are not fully expecting to
handle signals, but also cutting the intended test runtimes short due
signal_pending() now returning true (via __igt_timeout), which therefore
breaks both the patterns of:
kthread_run()
..sleep for igt_timeout_ms to allow test to exercise stuff..
kthread_stop()
And check for errors recorded in the thread.
And also:
Main thread | Test thread
---------------+------------------------------
kthread_run() |
kthread_stop() | do stuff until __igt_timeout
| -- exits early due signal --
Where this kthread_stop() was assume would have a "join" semantics, which
it would have had if not the new signal assertion issue.
To recap, threads are now likely to catch a previously impossible
ERESTARTSYS or EINTR, marking the test as failed, or have a pointlessly
short run time.
To work around this start using kthread_work(er) API which provides
an explicit way of waiting for threads to exit. And for cases where
parent controls the test duration we add explicit signaling which threads
will now use instead of relying on kthread_should_stop().
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221020130841.3845791-1-tvrtko.ursulin@linux.intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r-- | drivers/gpu/drm/i915/gt/selftest_execlists.c | 48 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 51 |
2 files changed, 53 insertions, 46 deletions
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 56b7d5b5fea0..2c7c053a8808 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -3473,12 +3473,14 @@ static int random_priority(struct rnd_state *rnd) struct preempt_smoke { struct intel_gt *gt; + struct kthread_work work; struct i915_gem_context **contexts; struct intel_engine_cs *engine; struct drm_i915_gem_object *batch; unsigned int ncontext; struct rnd_state prng; unsigned long count; + int result; }; static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke) @@ -3538,34 +3540,31 @@ unpin: return err; } -static int smoke_crescendo_thread(void *arg) +static void smoke_crescendo_work(struct kthread_work *work) { - struct preempt_smoke *smoke = arg; + struct preempt_smoke *smoke = container_of(work, typeof(*smoke), work); IGT_TIMEOUT(end_time); unsigned long count; count = 0; do { struct i915_gem_context *ctx = smoke_context(smoke); - int err; - err = smoke_submit(smoke, - ctx, count % I915_PRIORITY_MAX, - smoke->batch); - if (err) - return err; + smoke->result = smoke_submit(smoke, ctx, + count % I915_PRIORITY_MAX, + smoke->batch); count++; - } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); + } while (!smoke->result && count < smoke->ncontext && + !__igt_timeout(end_time, NULL)); smoke->count = count; - return 0; } static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) #define BATCH BIT(0) { - struct task_struct *tsk[I915_NUM_ENGINES] = {}; + struct kthread_worker *worker[I915_NUM_ENGINES] = {}; struct preempt_smoke *arg; struct intel_engine_cs *engine; enum intel_engine_id id; @@ -3576,6 +3575,8 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) if (!arg) return -ENOMEM; + memset(arg, 0, I915_NUM_ENGINES * sizeof(*arg)); + for_each_engine(engine, smoke->gt, id) { arg[id] = *smoke; arg[id].engine = engine; @@ -3583,31 +3584,28 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) arg[id].batch = NULL; arg[id].count = 0; - tsk[id] = kthread_run(smoke_crescendo_thread, arg, - "igt/smoke:%d", id); - if (IS_ERR(tsk[id])) { - err = PTR_ERR(tsk[id]); + worker[id] = kthread_create_worker(0, "igt/smoke:%d", id); + if (IS_ERR(worker[id])) { + err = PTR_ERR(worker[id]); break; } - get_task_struct(tsk[id]); - } - yield(); /* start all threads before we kthread_stop() */ + kthread_init_work(&arg[id].work, smoke_crescendo_work); + kthread_queue_work(worker[id], &arg[id].work); + } count = 0; for_each_engine(engine, smoke->gt, id) { - int status; - - if (IS_ERR_OR_NULL(tsk[id])) + if (IS_ERR_OR_NULL(worker[id])) continue; - status = kthread_stop(tsk[id]); - if (status && !err) - err = status; + kthread_flush_work(&arg[id].work); + if (arg[id].result && !err) + err = arg[id].result; count += arg[id].count; - put_task_struct(tsk[id]); + kthread_destroy_worker(worker[id]); } pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 7f3bb1d34dfb..71263058a7b0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -866,10 +866,13 @@ static int igt_reset_active_engine(void *arg) } struct active_engine { - struct task_struct *task; + struct kthread_worker *worker; + struct kthread_work work; struct intel_engine_cs *engine; unsigned long resets; unsigned int flags; + bool stop; + int result; }; #define TEST_ACTIVE BIT(0) @@ -900,10 +903,10 @@ static int active_request_put(struct i915_request *rq) return err; } -static int active_engine(void *data) +static void active_engine(struct kthread_work *work) { I915_RND_STATE(prng); - struct active_engine *arg = data; + struct active_engine *arg = container_of(work, typeof(*arg), work); struct intel_engine_cs *engine = arg->engine; struct i915_request *rq[8] = {}; struct intel_context *ce[ARRAY_SIZE(rq)]; @@ -913,16 +916,17 @@ static int active_engine(void *data) for (count = 0; count < ARRAY_SIZE(ce); count++) { ce[count] = intel_context_create(engine); if (IS_ERR(ce[count])) { - err = PTR_ERR(ce[count]); - pr_err("[%s] Create context #%ld failed: %d!\n", engine->name, count, err); + arg->result = PTR_ERR(ce[count]); + pr_err("[%s] Create context #%ld failed: %d!\n", + engine->name, count, arg->result); while (--count) intel_context_put(ce[count]); - return err; + return; } } count = 0; - while (!kthread_should_stop()) { + while (!READ_ONCE(arg->stop)) { unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1); struct i915_request *old = rq[idx]; struct i915_request *new; @@ -967,7 +971,7 @@ static int active_engine(void *data) intel_context_put(ce[count]); } - return err; + arg->result = err; } static int __igt_reset_engines(struct intel_gt *gt, @@ -1022,7 +1026,7 @@ static int __igt_reset_engines(struct intel_gt *gt, memset(threads, 0, sizeof(*threads) * I915_NUM_ENGINES); for_each_engine(other, gt, tmp) { - struct task_struct *tsk; + struct kthread_worker *worker; threads[tmp].resets = i915_reset_engine_count(global, other); @@ -1036,19 +1040,21 @@ static int __igt_reset_engines(struct intel_gt *gt, threads[tmp].engine = other; threads[tmp].flags = flags; - tsk = kthread_run(active_engine, &threads[tmp], - "igt/%s", other->name); - if (IS_ERR(tsk)) { - err = PTR_ERR(tsk); - pr_err("[%s] Thread spawn failed: %d!\n", engine->name, err); + worker = kthread_create_worker(0, "igt/%s", + other->name); + if (IS_ERR(worker)) { + err = PTR_ERR(worker); + pr_err("[%s] Worker create failed: %d!\n", + engine->name, err); goto unwind; } - threads[tmp].task = tsk; - get_task_struct(tsk); - } + threads[tmp].worker = worker; - yield(); /* start all threads before we begin */ + kthread_init_work(&threads[tmp].work, active_engine); + kthread_queue_work(threads[tmp].worker, + &threads[tmp].work); + } st_engine_heartbeat_disable_no_pm(engine); GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, @@ -1197,17 +1203,20 @@ unwind: for_each_engine(other, gt, tmp) { int ret; - if (!threads[tmp].task) + if (!threads[tmp].worker) continue; - ret = kthread_stop(threads[tmp].task); + WRITE_ONCE(threads[tmp].stop, true); + kthread_flush_work(&threads[tmp].work); + ret = READ_ONCE(threads[tmp].result); if (ret) { pr_err("kthread for other engine %s failed, err=%d\n", other->name, ret); if (!err) err = ret; } - put_task_struct(threads[tmp].task); + + kthread_destroy_worker(threads[tmp].worker); /* GuC based resets are not logged per engine */ if (!using_guc) { |