diff options
author | Matthew Auld <matthew.auld@intel.com> | 2022-10-04 12:49:14 +0100 |
---|---|---|
committer | Matthew Auld <matthew.auld@intel.com> | 2022-10-14 18:08:04 +0100 |
commit | 8133a6daad4e72748e239a02775a853ca7ed798b (patch) | |
tree | 6a905495b329a2e381da7245bfe71a828acedc9d /drivers/gpu/drm/i915/gem/selftests/huge_pages.c | |
parent | e55427b46852f11ca37f33abb7d7ec76bb4c9ed3 (diff) |
drm/i915: enable PS64 support for DG2
It turns out that on production DG2/ATS HW we should have support for
PS64. This feature allows to provide a 64K TLB hint at the PTE level,
which is a lot more flexible than the current method of enabling 64K GTT
pages for the entire page-table, since that leads to all kinds of
annoying restrictions, as documented in:
commit caa574ffc4aaf4f29b890223878c63e2e7772f62
Author: Matthew Auld <matthew.auld@intel.com>
Date: Sat Feb 19 00:17:49 2022 +0530
drm/i915/uapi: document behaviour for DG2 64K support
On discrete platforms like DG2, we need to support a minimum page size
of 64K when dealing with device local-memory. This is quite tricky for
various reasons, so try to document the new implicit uapi for this.
With PS64, we can now drop the 2M GTT alignment restriction, and instead
only require 64K or larger when dealing with lmem. We still use the
compact-pt layout when possible, but only when we are certain that this
doesn't interfere with userspace.
Note that this is a change in uAPI behaviour, but hopefully shouldn't be
a concern (IGT is at least able to autodetect the alignment), since we
are only making the GTT alignment constraint less restrictive.
Based on a patch from CQ Tang.
v2: update the comment wrt scratch page
v3: (Nirmoy)
- Fix the selftest to actually use the random size, plus some comment
improvements, also drop the rem stuff.
Reported-by: Michal Mrozek <michal.mrozek@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Stuart Summers <stuart.summers@intel.com>
Cc: Jordan Justen <jordan.l.justen@intel.com>
Cc: Yang A Shi <yang.a.shi@intel.com>
Cc: Nirmoy Das <nirmoy.das@intel.com>
Cc: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Acked-by: Michal Mrozek <michal.mrozek@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221004114915.221708-1-matthew.auld@intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/gem/selftests/huge_pages.c')
-rw-r--r-- | drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 157 |
1 files changed, 156 insertions, 1 deletions
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index c570cf780079..0cb99e75b0bc 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1161,7 +1161,8 @@ static int igt_write_huge(struct drm_i915_private *i915, GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); size = obj->base.size; - if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) + if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K && + !HAS_64K_PAGES(i915)) size = round_up(size, I915_GTT_PAGE_SIZE_2M); n = 0; @@ -1214,6 +1215,10 @@ static int igt_write_huge(struct drm_i915_private *i915, * size and ensure the vma offset is at the start of the pt * boundary, however to improve coverage we opt for testing both * aligned and unaligned offsets. + * + * With PS64 this is no longer the case, but to ensure we + * sometimes get the compact layout for smaller objects, apply + * the round_up anyway. */ if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) offset_low = round_down(offset_low, @@ -1411,6 +1416,7 @@ static int igt_ppgtt_sanity_check(void *arg) { SZ_2M + SZ_4K, SZ_64K | SZ_4K }, { SZ_2M + SZ_4K, SZ_2M | SZ_4K }, { SZ_2M + SZ_64K, SZ_2M | SZ_64K }, + { SZ_2M + SZ_64K, SZ_64K }, }; int i, j; int err; @@ -1540,6 +1546,154 @@ out_put: return err; } +static int igt_ppgtt_mixed(void *arg) +{ + struct drm_i915_private *i915 = arg; + const unsigned long flags = PIN_OFFSET_FIXED | PIN_USER; + struct drm_i915_gem_object *obj, *on; + struct i915_gem_engines *engines; + struct i915_gem_engines_iter it; + struct i915_address_space *vm; + struct i915_gem_context *ctx; + struct intel_context *ce; + struct file *file; + I915_RND_STATE(prng); + LIST_HEAD(objects); + struct intel_memory_region *mr; + struct i915_vma *vma; + unsigned int count; + u32 i, addr; + int *order; + int n, err; + + /* + * Sanity check mixing 4K and 64K pages within the same page-table via + * the new PS64 TLB hint. + */ + + if (!HAS_64K_PAGES(i915)) { + pr_info("device lacks PS64, skipping\n"); + return 0; + } + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = hugepage_ctx(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + vm = i915_gem_context_get_eb_vm(ctx); + + i = 0; + addr = 0; + do { + u32 sz; + + sz = i915_prandom_u32_max_state(SZ_4M, &prng); + sz = max_t(u32, sz, SZ_4K); + + mr = i915->mm.regions[INTEL_REGION_LMEM_0]; + if (i & 1) + mr = i915->mm.regions[INTEL_REGION_SMEM]; + + obj = i915_gem_object_create_region(mr, sz, 0, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_vm; + } + + list_add_tail(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_put; + } + + addr = round_up(addr, mr->min_page_size); + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) + goto err_put; + + if (mr->type == INTEL_MEMORY_LOCAL && + (vma->resource->page_sizes_gtt & I915_GTT_PAGE_SIZE_4K)) { + err = -EINVAL; + goto err_put; + } + + addr += obj->base.size; + i++; + } while (addr <= SZ_16M); + + n = 0; + count = 0; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + count++; + if (!intel_engine_can_store_dword(ce->engine)) + continue; + + n++; + } + i915_gem_context_unlock_engines(ctx); + if (!n) + goto err_put; + + order = i915_random_order(count * count, &prng); + if (!order) { + err = -ENOMEM; + goto err_put; + } + + i = 0; + addr = 0; + engines = i915_gem_context_lock_engines(ctx); + list_for_each_entry(obj, &objects, st_link) { + u32 rnd = i915_prandom_u32_max_state(UINT_MAX, &prng); + + addr = round_up(addr, obj->mm.region->min_page_size); + + ce = engines->engines[order[i] % engines->num_engines]; + i = (i + 1) % (count * count); + if (!ce || !intel_engine_can_store_dword(ce->engine)) + continue; + + err = __igt_write_huge(ce, obj, obj->base.size, addr, 0, rnd); + if (err) + break; + + err = __igt_write_huge(ce, obj, obj->base.size, addr, + offset_in_page(rnd) / sizeof(u32), rnd + 1); + if (err) + break; + + err = __igt_write_huge(ce, obj, obj->base.size, addr, + (PAGE_SIZE / sizeof(u32)) - 1, + rnd + 2); + if (err) + break; + + addr += obj->base.size; + + cond_resched(); + } + + i915_gem_context_unlock_engines(ctx); + kfree(order); +err_put: + list_for_each_entry_safe(obj, on, &objects, st_link) { + list_del(&obj->st_link); + i915_gem_object_put(obj); + } +out_vm: + i915_vm_put(vm); +out: + fput(file); + return err; +} + static int igt_tmpfs_fallback(void *arg) { struct drm_i915_private *i915 = arg; @@ -1803,6 +1957,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ppgtt_smoke_huge), SUBTEST(igt_ppgtt_sanity_check), SUBTEST(igt_ppgtt_compact), + SUBTEST(igt_ppgtt_mixed), }; if (!HAS_PPGTT(i915)) { |