summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt/selftest_migrate.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt/selftest_migrate.c')
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_migrate.c483
1 files changed, 426 insertions, 57 deletions
diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c
index fa4293d2944f..fdf0e9858607 100644
--- a/drivers/gpu/drm/i915/gt/selftest_migrate.c
+++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c
@@ -5,6 +5,10 @@
#include <linux/sort.h>
+#include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_lmem.h"
+
+#include "selftests/igt_spinner.h"
#include "selftests/i915_random.h"
static const unsigned int sizes[] = {
@@ -130,6 +134,124 @@ err_free_src:
return err;
}
+static int intel_context_copy_ccs(struct intel_context *ce,
+ const struct i915_deps *deps,
+ struct scatterlist *sg,
+ unsigned int pat_index,
+ bool write_to_ccs,
+ struct i915_request **out)
+{
+ u8 src_access = write_to_ccs ? DIRECT_ACCESS : INDIRECT_ACCESS;
+ u8 dst_access = write_to_ccs ? INDIRECT_ACCESS : DIRECT_ACCESS;
+ struct sgt_dma it = sg_sgt(sg);
+ struct i915_request *rq;
+ u32 offset;
+ int err;
+
+ GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
+ *out = NULL;
+
+ GEM_BUG_ON(ce->ring->size < SZ_64K);
+
+ offset = 0;
+ if (HAS_64K_PAGES(ce->engine->i915))
+ offset = CHUNK_SZ;
+
+ do {
+ int len;
+
+ rq = i915_request_create(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_ce;
+ }
+
+ if (deps) {
+ err = i915_request_await_deps(rq, deps);
+ if (err)
+ goto out_rq;
+
+ if (rq->engine->emit_init_breadcrumb) {
+ err = rq->engine->emit_init_breadcrumb(rq);
+ if (err)
+ goto out_rq;
+ }
+
+ deps = NULL;
+ }
+
+ /* The PTE updates + clear must not be interrupted. */
+ err = emit_no_arbitration(rq);
+ if (err)
+ goto out_rq;
+
+ len = emit_pte(rq, &it, pat_index, true, offset, CHUNK_SZ);
+ if (len <= 0) {
+ err = len;
+ goto out_rq;
+ }
+
+ err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (err)
+ goto out_rq;
+
+ err = emit_copy_ccs(rq, offset, dst_access,
+ offset, src_access, len);
+ if (err)
+ goto out_rq;
+
+ err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+
+ /* Arbitration is re-enabled between requests. */
+out_rq:
+ if (*out)
+ i915_request_put(*out);
+ *out = i915_request_get(rq);
+ i915_request_add(rq);
+ if (err || !it.sg || !sg_dma_len(it.sg))
+ break;
+
+ cond_resched();
+ } while (1);
+
+out_ce:
+ return err;
+}
+
+static int
+intel_migrate_ccs_copy(struct intel_migrate *m,
+ struct i915_gem_ww_ctx *ww,
+ const struct i915_deps *deps,
+ struct scatterlist *sg,
+ unsigned int pat_index,
+ bool write_to_ccs,
+ struct i915_request **out)
+{
+ struct intel_context *ce;
+ int err;
+
+ *out = NULL;
+ if (!m->context)
+ return -ENODEV;
+
+ ce = intel_migrate_create_context(m);
+ if (IS_ERR(ce))
+ ce = intel_context_get(m->context);
+ GEM_BUG_ON(IS_ERR(ce));
+
+ err = intel_context_pin_ww(ce, ww);
+ if (err)
+ goto out;
+
+ err = intel_context_copy_ccs(ce, deps, sg, pat_index,
+ write_to_ccs, out);
+
+ intel_context_unpin(ce);
+out:
+ intel_context_put(ce);
+ return err;
+}
+
static int clear(struct intel_migrate *migrate,
int (*fn)(struct intel_migrate *migrate,
struct i915_gem_ww_ctx *ww,
@@ -140,9 +262,10 @@ static int clear(struct intel_migrate *migrate,
{
struct drm_i915_private *i915 = migrate->context->engine->i915;
struct drm_i915_gem_object *obj;
- struct i915_request *rq;
+ struct i915_request *rq = NULL;
struct i915_gem_ww_ctx ww;
- u32 *vaddr;
+ u32 *vaddr, val = 0;
+ bool ccs_cap = false;
int err = 0;
int i;
@@ -150,7 +273,15 @@ static int clear(struct intel_migrate *migrate,
if (IS_ERR(obj))
return 0;
+ /* Consider the rounded up memory too */
+ sz = obj->base.size;
+
+ if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj))
+ ccs_cap = true;
+
for_i915_gem_ww(&ww, err, true) {
+ int ccs_bytes, ccs_bytes_per_chunk;
+
err = i915_gem_object_lock(obj, &ww);
if (err)
continue;
@@ -165,44 +296,114 @@ static int clear(struct intel_migrate *migrate,
vaddr[i] = ~i;
i915_gem_object_flush_map(obj);
- err = fn(migrate, &ww, obj, sz, &rq);
- if (!err)
- continue;
+ if (ccs_cap && !val) {
+ /* Write the obj data into ccs surface */
+ err = intel_migrate_ccs_copy(migrate, &ww, NULL,
+ obj->mm.pages->sgl,
+ obj->pat_index,
+ true, &rq);
+ if (rq && !err) {
+ if (i915_request_wait(rq, 0, HZ) < 0) {
+ pr_err("%ps timed out, size: %u\n",
+ fn, sz);
+ err = -ETIME;
+ }
+ i915_request_put(rq);
+ rq = NULL;
+ }
+ if (err)
+ continue;
+ }
- if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
- pr_err("%ps failed, size: %u\n", fn, sz);
- if (rq) {
- i915_request_wait(rq, 0, HZ);
+ err = fn(migrate, &ww, obj, val, &rq);
+ if (rq && !err) {
+ if (i915_request_wait(rq, 0, HZ) < 0) {
+ pr_err("%ps timed out, size: %u\n", fn, sz);
+ err = -ETIME;
+ }
i915_request_put(rq);
+ rq = NULL;
}
- i915_gem_object_unpin_map(obj);
- }
- if (err)
- goto err_out;
+ if (err)
+ continue;
- if (rq) {
- if (i915_request_wait(rq, 0, HZ) < 0) {
- pr_err("%ps timed out, size: %u\n", fn, sz);
- err = -ETIME;
+ i915_gem_object_flush_map(obj);
+
+ /* Verify the set/clear of the obj mem */
+ for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
+ int x = i * 1024 +
+ i915_prandom_u32_max_state(1024, prng);
+
+ if (vaddr[x] != val) {
+ pr_err("%ps failed, (%u != %u), offset: %zu\n",
+ fn, vaddr[x], val, x * sizeof(u32));
+ igt_hexdump(vaddr + i * 1024, 4096);
+ err = -EINVAL;
+ }
}
- i915_request_put(rq);
- }
+ if (err)
+ continue;
- for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
- int x = i * 1024 + i915_prandom_u32_max_state(1024, prng);
+ if (ccs_cap && !val) {
+ for (i = 0; i < sz / sizeof(u32); i++)
+ vaddr[i] = ~i;
+ i915_gem_object_flush_map(obj);
+
+ err = intel_migrate_ccs_copy(migrate, &ww, NULL,
+ obj->mm.pages->sgl,
+ obj->pat_index,
+ false, &rq);
+ if (rq && !err) {
+ if (i915_request_wait(rq, 0, HZ) < 0) {
+ pr_err("%ps timed out, size: %u\n",
+ fn, sz);
+ err = -ETIME;
+ }
+ i915_request_put(rq);
+ rq = NULL;
+ }
+ if (err)
+ continue;
+
+ ccs_bytes = GET_CCS_BYTES(i915, sz);
+ ccs_bytes_per_chunk = GET_CCS_BYTES(i915, CHUNK_SZ);
+ i915_gem_object_flush_map(obj);
+
+ for (i = 0; !err && i < DIV_ROUND_UP(ccs_bytes, PAGE_SIZE); i++) {
+ int offset = ((i * PAGE_SIZE) /
+ ccs_bytes_per_chunk) * CHUNK_SZ / sizeof(u32);
+ int ccs_bytes_left = (ccs_bytes - i * PAGE_SIZE) / sizeof(u32);
+ int x = i915_prandom_u32_max_state(min_t(int, 1024,
+ ccs_bytes_left), prng);
+
+ if (vaddr[offset + x]) {
+ pr_err("%ps ccs clearing failed, offset: %ld/%d\n",
+ fn, i * PAGE_SIZE + x * sizeof(u32), ccs_bytes);
+ igt_hexdump(vaddr + offset,
+ min_t(int, 4096,
+ ccs_bytes_left * sizeof(u32)));
+ err = -EINVAL;
+ }
+ }
+
+ if (err)
+ continue;
+ }
+ i915_gem_object_unpin_map(obj);
+ }
- if (vaddr[x] != sz) {
- pr_err("%ps failed, size: %u, offset: %zu\n",
- fn, sz, x * sizeof(u32));
- igt_hexdump(vaddr + i * 1024, 4096);
- err = -EINVAL;
+ if (err) {
+ if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
+ pr_err("%ps failed, size: %u\n", fn, sz);
+ if (rq && err != -EINVAL) {
+ i915_request_wait(rq, 0, HZ);
+ i915_request_put(rq);
}
+
+ i915_gem_object_unpin_map(obj);
}
- i915_gem_object_unpin_map(obj);
-err_out:
i915_gem_object_put(obj);
-
return err;
}
@@ -213,9 +414,9 @@ static int __migrate_copy(struct intel_migrate *migrate,
struct i915_request **out)
{
return intel_migrate_copy(migrate, ww, NULL,
- src->mm.pages->sgl, src->cache_level,
+ src->mm.pages->sgl, src->pat_index,
i915_gem_object_is_lmem(src),
- dst->mm.pages->sgl, dst->cache_level,
+ dst->mm.pages->sgl, dst->pat_index,
i915_gem_object_is_lmem(dst),
out);
}
@@ -227,9 +428,9 @@ static int __global_copy(struct intel_migrate *migrate,
struct i915_request **out)
{
return intel_context_migrate_copy(migrate->context, NULL,
- src->mm.pages->sgl, src->cache_level,
+ src->mm.pages->sgl, src->pat_index,
i915_gem_object_is_lmem(src),
- dst->mm.pages->sgl, dst->cache_level,
+ dst->mm.pages->sgl, dst->pat_index,
i915_gem_object_is_lmem(dst),
out);
}
@@ -254,7 +455,7 @@ static int __migrate_clear(struct intel_migrate *migrate,
{
return intel_migrate_clear(migrate, ww, NULL,
obj->mm.pages->sgl,
- obj->cache_level,
+ obj->pat_index,
i915_gem_object_is_lmem(obj),
value, out);
}
@@ -267,7 +468,7 @@ static int __global_clear(struct intel_migrate *migrate,
{
return intel_context_migrate_clear(migrate->context, NULL,
obj->mm.pages->sgl,
- obj->cache_level,
+ obj->pat_index,
i915_gem_object_is_lmem(obj),
value, out);
}
@@ -286,7 +487,8 @@ global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
static int live_migrate_copy(void *arg)
{
- struct intel_migrate *migrate = arg;
+ struct intel_gt *gt = arg;
+ struct intel_migrate *migrate = &gt->migrate;
struct drm_i915_private *i915 = migrate->context->engine->i915;
I915_RND_STATE(prng);
int i;
@@ -307,7 +509,8 @@ static int live_migrate_copy(void *arg)
static int live_migrate_clear(void *arg)
{
- struct intel_migrate *migrate = arg;
+ struct intel_gt *gt = arg;
+ struct intel_migrate *migrate = &gt->migrate;
struct drm_i915_private *i915 = migrate->context->engine->i915;
I915_RND_STATE(prng);
int i;
@@ -327,6 +530,149 @@ static int live_migrate_clear(void *arg)
return 0;
}
+struct spinner_timer {
+ struct timer_list timer;
+ struct igt_spinner spin;
+};
+
+static void spinner_kill(struct timer_list *timer)
+{
+ struct spinner_timer *st = timer_container_of(st, timer, timer);
+
+ igt_spinner_end(&st->spin);
+ pr_info("%s\n", __func__);
+}
+
+static int live_emit_pte_full_ring(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_migrate *migrate = &gt->migrate;
+ struct drm_i915_private *i915 = migrate->context->engine->i915;
+ struct drm_i915_gem_object *obj;
+ struct intel_context *ce;
+ struct i915_request *rq, *prev;
+ struct spinner_timer st;
+ struct sgt_dma it;
+ int len, sz, err;
+ u32 *cs;
+
+ /*
+ * Simple regression test to check that we don't trample the
+ * rq->reserved_space when returning from emit_pte(), if the ring is
+ * nearly full.
+ */
+
+ if (igt_spinner_init(&st.spin, to_gt(i915)))
+ return -ENOMEM;
+
+ obj = i915_gem_object_create_internal(i915, 2 * PAGE_SIZE);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto out_spinner;
+ }
+
+ err = i915_gem_object_pin_pages_unlocked(obj);
+ if (err)
+ goto out_obj;
+
+ ce = intel_migrate_create_context(migrate);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto out_obj;
+ }
+
+ ce->ring_size = SZ_4K; /* Not too big */
+
+ err = intel_context_pin(ce);
+ if (err)
+ goto out_put;
+
+ rq = igt_spinner_create_request(&st.spin, ce, MI_ARB_CHECK);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_unpin;
+ }
+
+ i915_request_add(rq);
+ if (!igt_wait_for_spinner(&st.spin, rq)) {
+ err = -EIO;
+ goto out_unpin;
+ }
+
+ /*
+ * Fill the rest of the ring leaving I915_EMIT_PTE_NUM_DWORDS +
+ * ring->reserved_space at the end. To actually emit the PTEs we require
+ * slightly more than I915_EMIT_PTE_NUM_DWORDS, since our object size is
+ * greater than PAGE_SIZE. The correct behaviour is to wait for more
+ * ring space in emit_pte(), otherwise we trample on the reserved_space
+ * resulting in crashes when later submitting the rq.
+ */
+
+ prev = NULL;
+ do {
+ if (prev)
+ i915_request_add(rq);
+
+ rq = i915_request_create(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_unpin;
+ }
+
+ sz = (rq->ring->space - rq->reserved_space) / sizeof(u32) -
+ I915_EMIT_PTE_NUM_DWORDS;
+ sz = min_t(u32, sz, (SZ_1K - rq->reserved_space) / sizeof(u32) -
+ I915_EMIT_PTE_NUM_DWORDS);
+ cs = intel_ring_begin(rq, sz);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto out_rq;
+ }
+
+ memset32(cs, MI_NOOP, sz);
+ cs += sz;
+ intel_ring_advance(rq, cs);
+
+ pr_info("%s emit=%u sz=%d\n", __func__, rq->ring->emit, sz);
+
+ prev = rq;
+ } while (rq->ring->space > (rq->reserved_space +
+ I915_EMIT_PTE_NUM_DWORDS * sizeof(u32)));
+
+ timer_setup_on_stack(&st.timer, spinner_kill, 0);
+ mod_timer(&st.timer, jiffies + 2 * HZ);
+
+ /*
+ * This should wait for the spinner to be killed, otherwise we should go
+ * down in flames when doing i915_request_add().
+ */
+ pr_info("%s emite_pte ring space=%u\n", __func__, rq->ring->space);
+ it = sg_sgt(obj->mm.pages->sgl);
+ len = emit_pte(rq, &it, obj->pat_index, false, 0, CHUNK_SZ);
+ if (!len) {
+ err = -EINVAL;
+ goto out_rq;
+ }
+ if (len < 0) {
+ err = len;
+ goto out_rq;
+ }
+
+out_rq:
+ i915_request_add(rq); /* GEM_BUG_ON(rq->reserved_space > ring->space)? */
+ timer_delete_sync(&st.timer);
+ timer_destroy_on_stack(&st.timer);
+out_unpin:
+ intel_context_unpin(ce);
+out_put:
+ intel_context_put(ce);
+out_obj:
+ i915_gem_object_put(obj);
+out_spinner:
+ igt_spinner_fini(&st.spin);
+ return err;
+}
+
struct threaded_migrate {
struct intel_migrate *migrate;
struct task_struct *tsk;
@@ -364,7 +710,14 @@ static int threaded_migrate(struct intel_migrate *migrate,
thread[i].tsk = tsk;
}
- msleep(10); /* start all threads before we kthread_stop() */
+ /*
+ * Start all threads before we kthread_stop().
+ * In CHV / BXT+VTD environments, where VMA pinning is committed
+ * asynchronously, empirically determined 100ms delay is needed
+ * to avoid stopping threads that may still wait for completion of
+ * intel_ggtt_bind_vma and fail with -ERESTARTSYS when interrupted.
+ */
+ msleep((intel_vm_no_concurrent_access_wa(migrate->context->vm->i915) ? 100 : 10) * n_cpus);
for (i = 0; i < n_cpus; ++i) {
struct task_struct *tsk = thread[i].tsk;
@@ -373,11 +726,9 @@ static int threaded_migrate(struct intel_migrate *migrate,
if (IS_ERR_OR_NULL(tsk))
continue;
- status = kthread_stop(tsk);
+ status = kthread_stop_put(tsk);
if (status && !err)
err = status;
-
- put_task_struct(tsk);
}
kfree(thread);
@@ -393,7 +744,10 @@ static int __thread_migrate_copy(void *arg)
static int thread_migrate_copy(void *arg)
{
- return threaded_migrate(arg, __thread_migrate_copy, 0);
+ struct intel_gt *gt = arg;
+ struct intel_migrate *migrate = &gt->migrate;
+
+ return threaded_migrate(migrate, __thread_migrate_copy, 0);
}
static int __thread_global_copy(void *arg)
@@ -405,7 +759,10 @@ static int __thread_global_copy(void *arg)
static int thread_global_copy(void *arg)
{
- return threaded_migrate(arg, __thread_global_copy, 0);
+ struct intel_gt *gt = arg;
+ struct intel_migrate *migrate = &gt->migrate;
+
+ return threaded_migrate(migrate, __thread_global_copy, 0);
}
static int __thread_migrate_clear(void *arg)
@@ -424,12 +781,18 @@ static int __thread_global_clear(void *arg)
static int thread_migrate_clear(void *arg)
{
- return threaded_migrate(arg, __thread_migrate_clear, 0);
+ struct intel_gt *gt = arg;
+ struct intel_migrate *migrate = &gt->migrate;
+
+ return threaded_migrate(migrate, __thread_migrate_clear, 0);
}
static int thread_global_clear(void *arg)
{
- return threaded_migrate(arg, __thread_global_clear, 0);
+ struct intel_gt *gt = arg;
+ struct intel_migrate *migrate = &gt->migrate;
+
+ return threaded_migrate(migrate, __thread_global_clear, 0);
}
int intel_migrate_live_selftests(struct drm_i915_private *i915)
@@ -437,6 +800,7 @@ int intel_migrate_live_selftests(struct drm_i915_private *i915)
static const struct i915_subtest tests[] = {
SUBTEST(live_migrate_copy),
SUBTEST(live_migrate_clear),
+ SUBTEST(live_emit_pte_full_ring),
SUBTEST(thread_migrate_copy),
SUBTEST(thread_migrate_clear),
SUBTEST(thread_global_copy),
@@ -447,7 +811,7 @@ int intel_migrate_live_selftests(struct drm_i915_private *i915)
if (!gt->migrate.context)
return 0;
- return i915_subtests(tests, &gt->migrate);
+ return intel_gt_live_subtests(tests, gt);
}
static struct drm_i915_gem_object *
@@ -485,7 +849,7 @@ static int wrap_ktime_compare(const void *A, const void *B)
static int __perf_clear_blt(struct intel_context *ce,
struct scatterlist *sg,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
bool is_lmem,
size_t sz)
{
@@ -499,7 +863,7 @@ static int __perf_clear_blt(struct intel_context *ce,
t0 = ktime_get();
- err = intel_context_migrate_clear(ce, NULL, sg, cache_level,
+ err = intel_context_migrate_clear(ce, NULL, sg, pat_index,
is_lmem, 0, &rq);
if (rq) {
if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
@@ -545,7 +909,8 @@ static int perf_clear_blt(void *arg)
err = __perf_clear_blt(gt->migrate.context,
dst->mm.pages->sgl,
- I915_CACHE_NONE,
+ i915_gem_get_pat_index(gt->i915,
+ I915_CACHE_NONE),
i915_gem_object_is_lmem(dst),
sizes[i]);
@@ -560,10 +925,10 @@ static int perf_clear_blt(void *arg)
static int __perf_copy_blt(struct intel_context *ce,
struct scatterlist *src,
- enum i915_cache_level src_cache_level,
+ unsigned int src_pat_index,
bool src_is_lmem,
struct scatterlist *dst,
- enum i915_cache_level dst_cache_level,
+ unsigned int dst_pat_index,
bool dst_is_lmem,
size_t sz)
{
@@ -578,9 +943,9 @@ static int __perf_copy_blt(struct intel_context *ce,
t0 = ktime_get();
err = intel_context_migrate_copy(ce, NULL,
- src, src_cache_level,
+ src, src_pat_index,
src_is_lmem,
- dst, dst_cache_level,
+ dst, dst_pat_index,
dst_is_lmem,
&rq);
if (rq) {
@@ -619,13 +984,15 @@ static int perf_copy_blt(void *arg)
for (i = 0; i < ARRAY_SIZE(sizes); i++) {
struct drm_i915_gem_object *src, *dst;
+ size_t sz;
int err;
src = create_init_lmem_internal(gt, sizes[i], true);
if (IS_ERR(src))
return PTR_ERR(src);
- dst = create_init_lmem_internal(gt, sizes[i], false);
+ sz = src->base.size;
+ dst = create_init_lmem_internal(gt, sz, false);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto err_src;
@@ -633,12 +1000,14 @@ static int perf_copy_blt(void *arg)
err = __perf_copy_blt(gt->migrate.context,
src->mm.pages->sgl,
- I915_CACHE_NONE,
+ i915_gem_get_pat_index(gt->i915,
+ I915_CACHE_NONE),
i915_gem_object_is_lmem(src),
dst->mm.pages->sgl,
- I915_CACHE_NONE,
+ i915_gem_get_pat_index(gt->i915,
+ I915_CACHE_NONE),
i915_gem_object_is_lmem(dst),
- sizes[i]);
+ sz);
i915_gem_object_unlock(dst);
i915_gem_object_put(dst);