diff options
author | Rodrigo Vivi <rodrigo.vivi@intel.com> | 2022-01-31 13:19:33 -0500 |
---|---|---|
committer | Rodrigo Vivi <rodrigo.vivi@intel.com> | 2022-01-31 13:19:33 -0500 |
commit | 063565aca3734de4e73639a0e460a58d9418b3cd (patch) | |
tree | fb2455b984f584a819defe6e5fe512a4b6fc33ae /drivers/gpu/drm/i915/gem | |
parent | 14683babf8ee356a232ee76b0acd332aef51fdc4 (diff) | |
parent | 26291c54e111ff6ba87a164d85d4a4e134b7315c (diff) |
Merge drm/drm-next into drm-intel-next
Catch-up with 5.17-rc2 and trying to align with drm-intel-gt-next
for a possible topic branch for merging the split of i915_regs...
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Diffstat (limited to 'drivers/gpu/drm/i915/gem')
33 files changed, 1987 insertions, 737 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index f0435c6feb68..8a248003dfae 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -69,10 +69,16 @@ static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj) bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, unsigned int flags) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct clflush *clflush; assert_object_held(obj); + if (IS_DGFX(i915)) { + WARN_ON_ONCE(obj->cache_dirty); + return false; + } + /* * Stolen memory is always coherent with the GPU as it is explicitly * marked as wc by the system, or the system is cache-coherent. @@ -105,16 +111,24 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, if (clflush) { i915_sw_fence_await_reservation(&clflush->base.chain, obj->base.resv, NULL, true, - i915_fence_timeout(to_i915(obj->base.dev)), + i915_fence_timeout(i915), I915_FENCE_GFP); dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma); dma_fence_work_commit(&clflush->base); + /* + * We must have successfully populated the pages(since we are + * holding a pin on the pages as per the flush worker) to reach + * this point, which must mean we have already done the required + * flush-on-acquire, hence resetting cache_dirty here should be + * safe. + */ + obj->cache_dirty = false; } else if (obj->mm.pages) { __do_clflush(obj); + obj->cache_dirty = false; } else { GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); } - obj->cache_dirty = false; return true; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index ebd775cb1661..00327b750fbb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -237,7 +237,7 @@ static int proto_context_set_persistence(struct drm_i915_private *i915, * colateral damage, and we should not pretend we can by * exposing the interface. */ - if (!intel_has_reset_engine(&i915->gt)) + if (!intel_has_reset_engine(to_gt(i915))) return -ENODEV; pc->user_flags &= ~BIT(UCONTEXT_PERSISTENCE); @@ -254,7 +254,7 @@ static int proto_context_set_protected(struct drm_i915_private *i915, if (!protected) { pc->uses_protected_content = false; - } else if (!intel_pxp_is_enabled(&i915->gt.pxp)) { + } else if (!intel_pxp_is_enabled(&to_gt(i915)->pxp)) { ret = -ENODEV; } else if ((pc->user_flags & BIT(UCONTEXT_RECOVERABLE)) || !(pc->user_flags & BIT(UCONTEXT_BANNABLE))) { @@ -268,8 +268,8 @@ static int proto_context_set_protected(struct drm_i915_private *i915, */ pc->pxp_wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (!intel_pxp_is_active(&i915->gt.pxp)) - ret = intel_pxp_start(&i915->gt.pxp); + if (!intel_pxp_is_active(&to_gt(i915)->pxp)) + ret = intel_pxp_start(&to_gt(i915)->pxp); } return ret; @@ -564,6 +564,7 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, container_of_user(base, typeof(*ext), base); const struct set_proto_ctx_engines *set = data; struct drm_i915_private *i915 = set->i915; + struct i915_engine_class_instance prev_engine; u64 flags; int err = 0, n, i, j; u16 slot, width, num_siblings; @@ -571,7 +572,7 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, intel_engine_mask_t prev_mask; /* FIXME: This is NIY for execlists */ - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) + if (!(intel_uc_uses_guc_submission(&to_gt(i915)->uc))) return -ENODEV; if (get_user(slot, &ext->engine_index)) @@ -629,7 +630,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, /* Create contexts / engines */ for (i = 0; i < width; ++i) { intel_engine_mask_t current_mask = 0; - struct i915_engine_class_instance prev_engine; for (j = 0; j < num_siblings; ++j) { struct i915_engine_class_instance ci; @@ -833,7 +833,7 @@ static int set_proto_ctx_sseu(struct drm_i915_file_private *fpriv, sseu = &pc->legacy_rcs_sseu; } - ret = i915_gem_user_to_context_sseu(&i915->gt, &user_sseu, sseu); + ret = i915_gem_user_to_context_sseu(to_gt(i915), &user_sseu, sseu); if (ret) return ret; @@ -1001,7 +1001,7 @@ static void free_engines_rcu(struct rcu_head *rcu) free_engines(engines); } -static int __i915_sw_fence_call +static int engines_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { struct i915_gem_engines *engines = @@ -1044,7 +1044,7 @@ static struct i915_gem_engines *alloc_engines(unsigned int count) static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx, struct intel_sseu rcs_sseu) { - const struct intel_gt *gt = &ctx->i915->gt; + const struct intel_gt *gt = to_gt(ctx->i915); struct intel_engine_cs *engine; struct i915_gem_engines *e, *err; enum intel_engine_id id; @@ -1521,7 +1521,7 @@ static int __context_set_persistence(struct i915_gem_context *ctx, bool state) * colateral damage, and we should not pretend we can by * exposing the interface. */ - if (!intel_has_reset_engine(&ctx->i915->gt)) + if (!intel_has_reset_engine(to_gt(ctx->i915))) return -ENODEV; i915_gem_context_clear_persistence(ctx); @@ -1559,7 +1559,7 @@ i915_gem_create_context(struct drm_i915_private *i915, } else if (HAS_FULL_PPGTT(i915)) { struct i915_ppgtt *ppgtt; - ppgtt = i915_ppgtt_create(&i915->gt, 0); + ppgtt = i915_ppgtt_create(to_gt(i915), 0); if (IS_ERR(ppgtt)) { drm_dbg(&i915->drm, "PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); @@ -1742,7 +1742,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, if (args->flags) return -EINVAL; - ppgtt = i915_ppgtt_create(&i915->gt, 0); + ppgtt = i915_ppgtt_create(to_gt(i915), 0); if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); @@ -2194,7 +2194,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, if (args->flags & I915_CONTEXT_CREATE_FLAGS_UNKNOWN) return -EINVAL; - ret = intel_gt_terminally_wedged(&i915->gt); + ret = intel_gt_terminally_wedged(to_gt(i915)); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index 8955d6abcef1..9402d4bf4ffc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -379,7 +379,7 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data if (ext.flags) return -EINVAL; - if (!intel_pxp_is_enabled(&ext_data->i915->gt.pxp)) + if (!intel_pxp_is_enabled(&to_gt(ext_data->i915)->pxp)) return -ENODEV; ext_data->flags |= I915_BO_PROTECTED; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index e8a58c997170..1b526039a60d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -248,8 +248,19 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) if (IS_ERR(pages)) return PTR_ERR(pages); - /* XXX: consider doing a vmap flush or something */ - if (!HAS_LLC(i915) || i915_gem_object_can_bypass_llc(obj)) + /* + * DG1 is special here since it still snoops transactions even with + * CACHE_NONE. This is not the case with other HAS_SNOOP platforms. We + * might need to revisit this as we add new discrete platforms. + * + * XXX: Consider doing a vmap flush or something, where possible. + * Currently we just do a heavy handed wbinvd_on_all_cpus() here since + * the underlying sg_table might not even point to struct pages, so we + * can't just call drm_clflush_sg or similar, like we do elsewhere in + * the driver. + */ + if (i915_gem_object_can_bypass_llc(obj) || + (!HAS_LLC(i915) && !IS_DG1(i915))) wbinvd_on_all_cpus(); sg_page_sizes = i915_sg_dma_sizes(pages->sgl); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index b684a62bf3b0..26532c07d467 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -18,10 +18,32 @@ static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + + if (IS_DGFX(i915)) + return false; + return !(obj->cache_level == I915_CACHE_NONE || obj->cache_level == I915_CACHE_WT); } +bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + + if (obj->cache_dirty) + return false; + + if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) + return true; + + if (IS_DGFX(i915)) + return false; + + /* Currently in use by HW (display engine)? Keep flushed. */ + return i915_gem_object_is_framebuffer(obj); +} + static void flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 1ff1b76d5206..355a7b68fdac 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -31,6 +31,7 @@ #include "i915_gem_ioctls.h" #include "i915_trace.h" #include "i915_user_extensions.h" +#include "i915_vma_snapshot.h" struct eb_vma { struct i915_vma *vma; @@ -309,11 +310,15 @@ struct i915_execbuffer { struct eb_fence *fences; unsigned long num_fences; +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) + struct i915_capture_list *capture_lists[MAX_ENGINE_INSTANCE + 1]; +#endif }; static int eb_parse(struct i915_execbuffer *eb); static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle); static void eb_unpin_engine(struct i915_execbuffer *eb); +static void eb_capture_release(struct i915_execbuffer *eb); static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) { @@ -992,7 +997,7 @@ static int eb_validate_vmas(struct i915_execbuffer *eb) } if (!(ev->flags & EXEC_OBJECT_WRITE)) { - err = dma_resv_reserve_shared(vma->resv, 1); + err = dma_resv_reserve_shared(vma->obj->base.resv, 1); if (err) return err; } @@ -1045,6 +1050,7 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final) i915_vma_put(vma); } + eb_capture_release(eb); eb_unpin_engine(eb); } @@ -1094,6 +1100,47 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) return &i915->ggtt; } +static void reloc_cache_unmap(struct reloc_cache *cache) +{ + void *vaddr; + + if (!cache->vaddr) + return; + + vaddr = unmask_page(cache->vaddr); + if (cache->vaddr & KMAP) + kunmap_atomic(vaddr); + else + io_mapping_unmap_atomic((void __iomem *)vaddr); +} + +static void reloc_cache_remap(struct reloc_cache *cache, + struct drm_i915_gem_object *obj) +{ + void *vaddr; + + if (!cache->vaddr) + return; + + if (cache->vaddr & KMAP) { + struct page *page = i915_gem_object_get_page(obj, cache->page); + + vaddr = kmap_atomic(page); + cache->vaddr = unmask_flags(cache->vaddr) | + (unsigned long)vaddr; + } else { + struct i915_ggtt *ggtt = cache_to_ggtt(cache); + unsigned long offset; + + offset = cache->node.start; + if (!drm_mm_node_allocated(&cache->node)) + offset += cache->page << PAGE_SHIFT; + + cache->vaddr = (unsigned long) + io_mapping_map_atomic_wc(&ggtt->iomap, offset); + } +} + static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb) { void *vaddr; @@ -1358,10 +1405,17 @@ eb_relocate_entry(struct i915_execbuffer *eb, * batchbuffers. */ if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && - GRAPHICS_VER(eb->i915) == 6) { + GRAPHICS_VER(eb->i915) == 6 && + !i915_vma_is_bound(target->vma, I915_VMA_GLOBAL_BIND)) { + struct i915_vma *vma = target->vma; + + reloc_cache_unmap(&eb->reloc_cache); + mutex_lock(&vma->vm->mutex); err = i915_vma_bind(target->vma, target->vma->obj->cache_level, PIN_GLOBAL, NULL); + mutex_unlock(&vma->vm->mutex); + reloc_cache_remap(&eb->reloc_cache, ev->vma->obj); if (err) return err; } @@ -1882,36 +1936,113 @@ eb_find_first_request_added(struct i915_execbuffer *eb) return NULL; } -static int eb_move_to_gpu(struct i915_execbuffer *eb) +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) + +/* Stage with GFP_KERNEL allocations before we enter the signaling critical path */ +static void eb_capture_stage(struct i915_execbuffer *eb) { const unsigned int count = eb->buffer_count; - unsigned int i = count; - int err = 0, j; + unsigned int i = count, j; + struct i915_vma_snapshot *vsnap; while (i--) { struct eb_vma *ev = &eb->vma[i]; struct i915_vma *vma = ev->vma; unsigned int flags = ev->flags; - struct drm_i915_gem_object *obj = vma->obj; - assert_vma_held(vma); + if (!(flags & EXEC_OBJECT_CAPTURE)) + continue; - if (flags & EXEC_OBJECT_CAPTURE) { + vsnap = i915_vma_snapshot_alloc(GFP_KERNEL); + if (!vsnap) + continue; + + i915_vma_snapshot_init(vsnap, vma, "user"); + for_each_batch_create_order(eb, j) { struct i915_capture_list *capture; - for_each_batch_create_order(eb, j) { - if (!eb->requests[j]) - break; + capture = kmalloc(sizeof(*capture), GFP_KERNEL); + if (!capture) + continue; - capture = kmalloc(sizeof(*capture), GFP_KERNEL); - if (capture) { - capture->next = - eb->requests[j]->capture_list; - capture->vma = vma; - eb->requests[j]->capture_list = capture; - } - } + capture->next = eb->capture_lists[j]; + capture->vma_snapshot = i915_vma_snapshot_get(vsnap); + eb->capture_lists[j] = capture; + } + i915_vma_snapshot_put(vsnap); + } +} + +/* Commit once we're in the critical path */ +static void eb_capture_commit(struct i915_execbuffer *eb) +{ + unsigned int j; + + for_each_batch_create_order(eb, j) { + struct i915_request *rq = eb->requests[j]; + + if (!rq) + break; + + rq->capture_list = eb->capture_lists[j]; + eb->capture_lists[j] = NULL; + } +} + +/* + * Release anything that didn't get committed due to errors. + * The capture_list will otherwise be freed at request retire. + */ +static void eb_capture_release(struct i915_execbuffer *eb) +{ + unsigned int j; + + for_each_batch_create_order(eb, j) { + if (eb->capture_lists[j]) { + i915_request_free_capture_list(eb->capture_lists[j]); + eb->capture_lists[j] = NULL; } + } +} + +static void eb_capture_list_clear(struct i915_execbuffer *eb) +{ + memset(eb->capture_lists, 0, sizeof(eb->capture_lists)); +} + +#else + +static void eb_capture_stage(struct i915_execbuffer *eb) +{ +} + +static void eb_capture_commit(struct i915_execbuffer *eb) +{ +} + +static void eb_capture_release(struct i915_execbuffer *eb) +{ +} + +static void eb_capture_list_clear(struct i915_execbuffer *eb) +{ +} + +#endif + +static int eb_move_to_gpu(struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + unsigned int i = count; + int err = 0, j; + + while (i--) { + struct eb_vma *ev = &eb->vma[i]; + struct i915_vma *vma = ev->vma; + unsigned int flags = ev->flags; + struct drm_i915_gem_object *obj = vma->obj; + + assert_vma_held(vma); /* * If the GPU is not _reading_ through the CPU cache, we need @@ -1992,6 +2123,8 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) /* Unconditionally flush any chipset caches (for streaming writes). */ intel_gt_chipset_flush(eb->gt); + eb_capture_commit(eb); + return 0; err_skip: @@ -2166,7 +2299,7 @@ static int eb_parse(struct i915_execbuffer *eb) goto err_trampoline; } - err = dma_resv_reserve_shared(shadow->resv, 1); + err = dma_resv_reserve_shared(shadow->obj->base.resv, 1); if (err) goto err_trampoline; @@ -2278,9 +2411,9 @@ static int eb_submit(struct i915_execbuffer *eb) return err; } -static int num_vcs_engines(const struct drm_i915_private *i915) +static int num_vcs_engines(struct drm_i915_private *i915) { - return hweight_long(VDBOX_MASK(&i915->gt)); + return hweight_long(VDBOX_MASK(to_gt(i915))); } /* @@ -3019,7 +3152,7 @@ eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd) fence_array = dma_fence_array_create(eb->num_batches, fences, eb->context->parallel.fence_context, - eb->context->parallel.seqno, + eb->context->parallel.seqno++, false); if (!fence_array) { kfree(fences); @@ -3116,7 +3249,7 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence, /* Allocate a request for this batch buffer nice and early. */ eb->requests[i] = i915_request_create(eb_find_context(eb, i)); if (IS_ERR(eb->requests[i])) { - out_fence = ERR_PTR(PTR_ERR(eb->requests[i])); + out_fence = ERR_CAST(eb->requests[i]); eb->requests[i] = NULL; return out_fence; } @@ -3134,13 +3267,14 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence, } /* - * Whilst this request exists, batch_obj will be on the - * active_list, and so will hold the active reference. Only when - * this request is retired will the batch_obj be moved onto - * the inactive_list and lose its active reference. Hence we do - * not need to explicitly hold another reference here. + * Not really on stack, but we don't want to call + * kfree on the batch_snapshot when we put it, so use the + * _onstack interface. */ - eb->requests[i]->batch = eb->batches[i]->vma; + if (eb->batches[i]->vma) + i915_vma_snapshot_init_onstack(&eb->requests[i]->batch_snapshot, + eb->batches[i]->vma, + "batch"); if (eb->batch_pool) { GEM_BUG_ON(intel_context_is_parallel(eb->context)); intel_gt_buffer_pool_mark_active(eb->batch_pool, @@ -3189,6 +3323,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.fences = NULL; eb.num_fences = 0; + eb_capture_list_clear(&eb); + memset(eb.requests, 0, sizeof(struct i915_request *) * ARRAY_SIZE(eb.requests)); eb.composite_fence = NULL; @@ -3275,10 +3411,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, } ww_acquire_done(&eb.ww.ctx); + eb_capture_stage(&eb); out_fence = eb_requests_create(&eb, in_fence, out_fence_fd); if (IS_ERR(out_fence)) { err = PTR_ERR(out_fence); + out_fence = NULL; if (eb.requests[0]) goto err_request; else diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index a57a6b7013c2..c5150a1ee3d2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -145,24 +145,10 @@ static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { .put_pages = i915_gem_object_put_pages_internal, }; -/** - * i915_gem_object_create_internal: create an object with volatile pages - * @i915: the i915 device - * @size: the size in bytes of backing storage to allocate for the object - * - * Creates a new object that wraps some internal memory for private use. - * This object is not backed by swappable storage, and as such its contents - * are volatile and only valid whilst pinned. If the object is reaped by the - * shrinker, its pages and data will be discarded. Equally, it is not a full - * GEM object and so not valid for access from userspace. This makes it useful - * for hardware interfaces like ringbuffers (which are pinned from the time - * the request is written to the time the hardware stops accessing it), but - * not for contexts (which need to be preserved when not active for later - * reuse). Note that it is not cleared upon allocation. - */ struct drm_i915_gem_object * -i915_gem_object_create_internal(struct drm_i915_private *i915, - phys_addr_t size) +__i915_gem_object_create_internal(struct drm_i915_private *i915, + const struct drm_i915_gem_object_ops *ops, + phys_addr_t size) { static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; @@ -179,7 +165,7 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, 0); + i915_gem_object_init(obj, ops, &lock_class, 0); obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; /* @@ -199,3 +185,25 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, return obj; } + +/** + * i915_gem_object_create_internal: create an object with volatile pages + * @i915: the i915 device + * @size: the size in bytes of backing storage to allocate for the object + * + * Creates a new object that wraps some internal memory for private use. + * This object is not backed by swappable storage, and as such its contents + * are volatile and only valid whilst pinned. If the object is reaped by the + * shrinker, its pages and data will be discarded. Equally, it is not a full + * GEM object and so not valid for access from userspace. This makes it useful + * for hardware interfaces like ringbuffers (which are pinned from the time + * the request is written to the time the hardware stops accessing it), but + * not for contexts (which need to be preserved when not active for later + * reuse). Note that it is not cleared upon allocation. + */ +struct drm_i915_gem_object * +i915_gem_object_create_internal(struct drm_i915_private *i915, + phys_addr_t size) +{ + return __i915_gem_object_create_internal(i915, &i915_gem_object_internal_ops, size); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 39bb15eafc07..1478c02a82cb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -73,7 +73,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, if (args->flags & ~(I915_MMAP_WC)) return -EINVAL; - if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) + if (args->flags & I915_MMAP_WC && !pat_enabled()) return -ENODEV; obj = i915_gem_object_lookup(file, args->handle); @@ -538,6 +538,9 @@ void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj) { struct i915_mmap_offset *mmo, *mn; + if (obj->ops->unmap_virtual) + obj->ops->unmap_virtual(obj); + spin_lock(&obj->mmo.lock); rbtree_postorder_for_each_entry_safe(mmo, mn, &obj->mmo.offsets, offset) { @@ -646,7 +649,7 @@ mmap_offset_attach(struct drm_i915_gem_object *obj, goto insert; /* Attempt to reap some mmap space from dead objects */ - err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT, + err = intel_gt_retire_requests_timeout(to_gt(i915), MAX_SCHEDULE_TIMEOUT, NULL); if (err) goto err; @@ -737,7 +740,7 @@ i915_gem_dumb_mmap_offset(struct drm_file *file, if (HAS_LMEM(to_i915(dev))) mmap_type = I915_MMAP_TYPE_FIXED; - else if (boot_cpu_has(X86_FEATURE_PAT)) + else if (pat_enabled()) mmap_type = I915_MMAP_TYPE_WC; else if (!i915_ggtt_has_aperture(&to_i915(dev)->ggtt)) return -ENODEV; @@ -793,7 +796,7 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, break; case I915_MMAP_OFFSET_WC: - if (!boot_cpu_has(X86_FEATURE_PAT)) + if (!pat_enabled()) return -ENODEV; type = I915_MMAP_TYPE_WC; break; @@ -803,7 +806,7 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, break; case I915_MMAP_OFFSET_UC: - if (!boot_cpu_has(X86_FEATURE_PAT)) + if (!pat_enabled()) return -ENODEV; type = I915_MMAP_TYPE_UC; break; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 1e426a42a36c..d87b508b59b1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -31,6 +31,7 @@ #include "i915_gem_context.h" #include "i915_gem_mman.h" #include "i915_gem_object.h" +#include "i915_gem_ttm.h" #include "i915_memcpy.h" #include "i915_trace.h" @@ -91,7 +92,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, } /** - * i915_gem_object_fini - Clean up a GEM object initialization + * __i915_gem_object_fini - Clean up a GEM object initialization * @obj: The gem object to cleanup * * This function cleans up gem object fields that are set up by @@ -107,25 +108,29 @@ void __i915_gem_object_fini(struct drm_i915_gem_object *obj) } /** - * Mark up the object's coherency levels for a given cache_level + * i915_gem_object_set_cache_coherency - Mark up the object's coherency levels + * for a given cache_level * @obj: #drm_i915_gem_object * @cache_level: cache level */ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, unsigned int cache_level) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + obj->cache_level = cache_level; if (cache_level != I915_CACHE_NONE) obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ | I915_BO_CACHE_COHERENT_FOR_WRITE); - else if (HAS_LLC(to_i915(obj->base.dev))) + else if (HAS_LLC(i915)) obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ; else obj->cache_coherent = 0; obj->cache_dirty = - !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE); + !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE) && + !IS_DGFX(i915); } bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj) @@ -257,6 +262,8 @@ static void __i915_gem_object_free_mmaps(struct drm_i915_gem_object *obj) */ void __i915_gem_object_pages_fini(struct drm_i915_gem_object *obj) { + assert_object_held(obj); + if (!list_empty(&obj->vma.list)) { struct i915_vma *vma; @@ -323,7 +330,16 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, obj->ops->delayed_free(obj); continue; } + + if (!i915_gem_object_trylock(obj, NULL)) { + /* busy, toss it back to the pile */ + if (llist_add(&obj->freed, &i915->mm.free_list)) + queue_delayed_work(i915->wq, &i915->mm.free_work, msecs_to_jiffies(10)); + continue; + } + __i915_gem_object_pages_fini(obj); + i915_gem_object_unlock(obj); __i915_gem_free_object(obj); /* But keep the pointer alive for RCU-protected lookups */ @@ -343,7 +359,7 @@ void i915_gem_flush_free_objects(struct drm_i915_private *i915) static void __i915_gem_free_work(struct work_struct *work) { struct drm_i915_private *i915 = - container_of(work, struct drm_i915_private, mm.free_work); + container_of(work, struct drm_i915_private, mm.free_work.work); i915_gem_flush_free_objects(i915); } @@ -364,15 +380,6 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj) atomic_inc(&i915->mm.free_count); /* - * This serializes freeing with the shrinker. Since the free - * is delayed, first by RCU then by the workqueue, we want the - * shrinker to be able to free pages of unreferenced objects, - * or else we may oom whilst there are plenty of deferred - * freed objects. - */ - i915_gem_object_make_unshrinkable(obj); - - /* * Since we require blocking on struct_mutex to unbind the freed * object from the GPU before releasing resources back to the * system, we can not do that directly from the RCU callback (which may @@ -384,7 +391,7 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj) */ if (llist_add(&obj->freed, &i915->mm.free_list)) - queue_work(i915->wq, &i915->mm.free_work); + queue_delayed_work(i915->wq, &i915->mm.free_work, 0); } void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj, @@ -456,7 +463,7 @@ i915_gem_object_read_from_page_iomap(struct drm_i915_gem_object *obj, u64 offset * from can't cross a page boundary. The caller must ensure that @obj pages * are pinned and that @obj is synced wrt. any related writes. * - * Returns 0 on success or -ENODEV if the type of @obj's backing store is + * Return: %0 on success or -ENODEV if the type of @obj's backing store is * unsupported. */ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset, void *dst, int size) @@ -709,7 +716,7 @@ bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj, void i915_gem_init__objects(struct drm_i915_private *i915) { - INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); + INIT_DELAYED_WORK(&i915->mm.free_work, __i915_gem_free_work); } void i915_objects_module_exit(void) @@ -732,6 +739,57 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = { .export = i915_gem_prime_export, }; +/** + * i915_gem_object_get_moving_fence - Get the object's moving fence if any + * @obj: The object whose moving fence to get. + * + * A non-signaled moving fence means that there is an async operation + * pending on the object that needs to be waited on before setting up + * any GPU- or CPU PTEs to the object's pages. + * + * Return: A refcounted pointer to the object's moving fence if any, + * NULL otherwise. + */ +struct dma_fence * +i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj) +{ + return dma_fence_get(i915_gem_to_ttm(obj)->moving); +} + +/** + * i915_gem_object_wait_moving_fence - Wait for the object's moving fence if any + * @obj: The object whose moving fence to wait for. + * @intr: Whether to wait interruptible. + * + * If the moving fence signaled without an error, it is detached from the + * object and put. + * + * Return: 0 if successful, -ERESTARTSYS if the wait was interrupted, + * negative error code if the async operation represented by the + * moving fence failed. + */ +int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, + bool intr) +{ + struct dma_fence *fence = i915_gem_to_ttm(obj)->moving; + int ret; + + assert_object_held(obj); + if (!fence) + return 0; + + ret = dma_fence_wait(fence, intr); + if (ret) + return ret; + + if (fence->error) + return fence->error; + + i915_gem_to_ttm(obj)->moving = NULL; + dma_fence_put(fence); + return 0; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/huge_gem_object.c" #include "selftests/huge_pages.c" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 59201801cec5..f66d46882ea7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -93,7 +93,6 @@ void i915_gem_flush_free_objects(struct drm_i915_private *i915); struct sg_table * __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj); -void i915_gem_object_truncate(struct drm_i915_gem_object *obj); /** * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle @@ -211,9 +210,13 @@ static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object return __i915_gem_object_lock(obj, ww, true); } -static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj) +static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww) { - return dma_resv_trylock(obj->base.resv); + if (!ww) + return dma_resv_trylock(obj->base.resv); + else + return ww_mutex_trylock(&obj->base.resv->lock, &ww->ctx); } static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj) @@ -296,6 +299,12 @@ i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) } static inline bool +i915_gem_object_has_self_managed_shrink_list(const struct drm_i915_gem_object *obj) +{ + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST); +} + +static inline bool i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj) { return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_PROXY); @@ -449,7 +458,7 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) } int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj); -void i915_gem_object_truncate(struct drm_i915_gem_object *obj); +int i915_gem_object_truncate(struct drm_i915_gem_object *obj); void i915_gem_object_writeback(struct drm_i915_gem_object *obj); /** @@ -512,11 +521,18 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj) i915_gem_object_unpin_pages(obj); } +struct dma_fence * +i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj); + +int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, + bool intr); + void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, unsigned int cache_level); bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj); void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj); +bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj); int __must_check i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write); @@ -533,25 +549,15 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj); void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj); +void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj); +void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj); void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj); -static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) -{ - if (obj->cache_dirty) - return false; - - if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) - return true; - - /* Currently in use by HW (display engine)? Keep flushed. */ - return i915_gem_object_is_framebuffer(obj); -} - static inline void __start_cpu_write(struct drm_i915_gem_object *obj) { obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; - if (cpu_write_needs_clflush(obj)) + if (i915_gem_cpu_write_needs_clflush(obj)) obj->cache_dirty = true; } @@ -613,6 +619,14 @@ int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj, enum intel_memory_type type); +int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, + size_t size, struct intel_memory_region *mr, + struct address_space *mapping, + unsigned int max_segment); +void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping, + bool dirty, bool backup); +void __shmem_writeback(size_t size, struct address_space *mapping); + #ifdef CONFIG_MMU_NOTIFIER static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index da85169006d4..0dd107dcecc2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -34,9 +34,11 @@ struct i915_lut_handle { struct drm_i915_gem_object_ops { unsigned int flags; -#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) -#define I915_GEM_OBJECT_IS_PROXY BIT(2) -#define I915_GEM_OBJECT_NO_MMAP BIT(3) +#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) +/* Skip the shrinker management in set_pages/unset_pages */ +#define I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST BIT(2) +#define I915_GEM_OBJECT_IS_PROXY BIT(3) +#define I915_GEM_OBJECT_NO_MMAP BIT(4) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set @@ -54,14 +56,18 @@ struct drm_i915_gem_object_ops { int (*get_pages)(struct drm_i915_gem_object *obj); void (*put_pages)(struct drm_i915_gem_object *obj, struct sg_table *pages); - void (*truncate)(struct drm_i915_gem_object *obj); + int (*truncate)(struct drm_i915_gem_object *obj); void (*writeback)(struct drm_i915_gem_object *obj); + int (*shrinker_release_pages)(struct drm_i915_gem_object *obj, + bool no_gpu_wait, + bool should_writeback); int (*pread)(struct drm_i915_gem_object *obj, const struct drm_i915_gem_pread *arg); int (*pwrite)(struct drm_i915_gem_object *obj, const struct drm_i915_gem_pwrite *arg); u64 (*mmap_offset)(struct drm_i915_gem_object *obj); + void (*unmap_virtual)(struct drm_i915_gem_object *obj); int (*dmabuf_export)(struct drm_i915_gem_object *obj); @@ -305,6 +311,7 @@ struct drm_i915_gem_object { #define I915_BO_READONLY BIT(6) #define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not release! */ #define I915_BO_PROTECTED BIT(8) +#define I915_BO_WAS_BOUND_BIT 9 /** * @mem_flags - Mutable placement-related flags * @@ -486,9 +493,37 @@ struct drm_i915_gem_object { * instead go through the pin/unpin interfaces. */ atomic_t pages_pin_count; + + /** + * @shrink_pin: Prevents the pages from being made visible to + * the shrinker, while the shrink_pin is non-zero. Most users + * should pretty much never have to care about this, outside of + * some special use cases. + * + * By default most objects will start out as visible to the + * shrinker(if I915_GEM_OBJECT_IS_SHRINKABLE) as soon as the + * backing pages are attached to the object, like in + * __i915_gem_object_set_pages(). They will then be removed the + * shrinker list once the pages are released. + * + * The @shrink_pin is incremented by calling + * i915_gem_object_make_unshrinkable(), which will also remove + * the object from the shrinker list, if the pin count was zero. + * + * Callers will then typically call + * i915_gem_object_make_shrinkable() or + * i915_gem_object_make_purgeable() to decrement the pin count, + * and make the pages visible again. + */ atomic_t shrink_pin; /** + * @ttm_shrinkable: True when the object is using shmem pages + * underneath. Protected by the object lock. + */ + bool ttm_shrinkable; + + /** * Priority list of potential placements for this object. */ struct intel_memory_region **placements; @@ -512,6 +547,7 @@ struct drm_i915_gem_object { */ struct list_head region_link; + struct i915_refct_sgt *rsgt; struct sg_table *pages; void *mapping; @@ -547,7 +583,7 @@ struct drm_i915_gem_object { struct i915_gem_object_page_iter get_dma_page; /** - * Element within i915->mm.unbound_list or i915->mm.bound_list, + * Element within i915->mm.shrink_list or i915->mm.purge_list, * locked by i915->mm.obj_lock. */ struct list_head link; @@ -565,7 +601,7 @@ struct drm_i915_gem_object { } mm; struct { - struct sg_table *cached_io_st; + struct i915_refct_sgt *cached_io_rsgt; struct i915_gem_object_page_iter get_io_page; struct drm_i915_gem_object *backup; bool created:1; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 8eb1c3a6fc9c..a50f884973bc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -10,6 +10,8 @@ #include "i915_gem_lmem.h" #include "i915_gem_mman.h" +#include "gt/intel_gt.h" + void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages, unsigned int sg_page_sizes) @@ -26,6 +28,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, /* Make the pages coherent with the GPU (flushing any swapin). */ if (obj->cache_dirty) { + WARN_ON_ONCE(IS_DGFX(i915)); obj->write_domain = 0; if (i915_gem_object_has_struct_page(obj)) drm_clflush_sg(pages); @@ -68,7 +71,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, shrinkable = false; } - if (shrinkable) { + if (shrinkable && !i915_gem_object_has_self_managed_shrink_list(obj)) { struct list_head *list; unsigned long flags; @@ -158,11 +161,12 @@ retry: } /* Immediately discard the backing storage */ -void i915_gem_object_truncate(struct drm_i915_gem_object *obj) +int i915_gem_object_truncate(struct drm_i915_gem_object *obj) { - drm_gem_free_mmap_offset(&obj->base); if (obj->ops->truncate) - obj->ops->truncate(obj); + return obj->ops->truncate(obj); + + return 0; } /* Try to discard unwanted pages */ @@ -208,7 +212,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) if (i915_gem_object_is_volatile(obj)) obj->mm.madv = I915_MADV_WILLNEED; - i915_gem_object_make_unshrinkable(obj); + if (!i915_gem_object_has_self_managed_shrink_list(obj)) + i915_gem_object_make_unshrinkable(obj); if (obj->mm.mapping) { unmap_object(obj, page_mask_bits(obj->mm.mapping)); @@ -218,6 +223,14 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) __i915_gem_object_reset_page_iter(obj); obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; + if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + intel_wakeref_t wakeref; + + with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref) + intel_gt_invalidate_tlbs(to_gt(i915)); + } + return pages; } @@ -414,8 +427,13 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, } if (!ptr) { - if (GEM_WARN_ON(type == I915_MAP_WC && - !static_cpu_has(X86_FEATURE_PAT))) + err = i915_gem_object_wait_moving_fence(obj, true); + if (err) { + ptr = ERR_PTR(err); + goto err_unpin; + } + + if (GEM_WARN_ON(type == I915_MAP_WC && !pat_enabled())) ptr = ERR_PTR(-ENODEV); else if (i915_gem_object_has_struct_page(obj)) ptr = i915_gem_object_map_page(obj, type); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 7986612f48fa..ca6faffcc496 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -19,6 +19,7 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) { struct address_space *mapping = obj->base.filp->f_mapping; + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct scatterlist *sg; struct sg_table *st; dma_addr_t dma; @@ -73,7 +74,7 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) dst += PAGE_SIZE; } - intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); + intel_gt_chipset_flush(to_gt(i915)); /* We're no longer struct page backed */ obj->mem_flags &= ~I915_BO_FLAG_STRUCT_PAGE; @@ -140,6 +141,7 @@ int i915_gem_object_pwrite_phys(struct drm_i915_gem_object *obj, { void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; char __user *user_data = u64_to_user_ptr(args->data_ptr); + struct drm_i915_private *i915 = to_i915(obj->base.dev); int err; err = i915_gem_object_wait(obj, @@ -159,7 +161,7 @@ int i915_gem_object_pwrite_phys(struct drm_i915_gem_object *obj, return -EFAULT; drm_clflush_virt_range(vaddr, args->size); - intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); + intel_gt_chipset_flush(to_gt(i915)); i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 726b40e1fbb0..ac56124760e1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -35,7 +35,7 @@ void i915_gem_suspend(struct drm_i915_private *i915) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - intel_gt_suspend_prepare(&i915->gt); + intel_gt_suspend_prepare(to_gt(i915)); i915_gem_drain_freed_objects(i915); } @@ -153,7 +153,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) * machine in an unusable condition. */ - intel_gt_suspend_late(&i915->gt); + intel_gt_suspend_late(to_gt(i915)); spin_lock_irqsave(&i915->mm.obj_lock, flags); for (phase = phases; *phase; phase++) { @@ -223,7 +223,7 @@ void i915_gem_resume(struct drm_i915_private *i915) * guarantee that the context image is complete. So let's just reset * it and start again. */ - intel_gt_resume(&i915->gt); + intel_gt_resume(to_gt(i915)); ret = lmem_restore(i915, I915_TTM_BACKUP_ALLOW_GPU); GEM_WARN_ON(ret); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index a016ccec36f3..a4350227e9ae 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -11,7 +11,7 @@ void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, struct intel_memory_region *mem) { - obj->mm.region = intel_memory_region_get(mem); + obj->mm.region = mem; mutex_lock(&mem->objects.lock); list_add(&obj->mm.region_link, &mem->objects.list); @@ -25,8 +25,6 @@ void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj) mutex_lock(&mem->objects.lock); list_del(&obj->mm.region_link); mutex_unlock(&mem->objects.lock); - - intel_memory_region_put(mem); } struct drm_i915_gem_object * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index d77da59fae04..cc9fe258fba7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -25,62 +25,67 @@ static void check_release_pagevec(struct pagevec *pvec) cond_resched(); } -static int shmem_get_pages(struct drm_i915_gem_object *obj) +void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping, + bool dirty, bool backup) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct intel_memory_region *mem = obj->mm.region; - const unsigned long page_count = obj->base.size / PAGE_SIZE; + struct sgt_iter sgt_iter; + struct pagevec pvec; + struct page *page; + + mapping_clear_unevictable(mapping); + + pagevec_init(&pvec); + for_each_sgt_page(page, sgt_iter, st) { + if (dirty) + set_page_dirty(page); + + if (backup) + mark_page_accessed(page); + + if (!pagevec_add(&pvec, page)) + check_release_pagevec(&pvec); + } + if (pagevec_count(&pvec)) + check_release_pagevec(&pvec); + + sg_free_table(st); +} + +int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, + size_t size, struct intel_memory_region *mr, + struct address_space *mapping, + unsigned int max_segment) +{ + const unsigned long page_count = size / PAGE_SIZE; unsigned long i; - struct address_space *mapping; - struct sg_table *st; struct scatterlist *sg; - struct sgt_iter sgt_iter; struct page *page; unsigned long last_pfn = 0; /* suppress gcc warning */ - unsigned int max_segment = i915_sg_segment_size(); - unsigned int sg_page_sizes; gfp_t noreclaim; int ret; /* - * Assert that the object is not currently in any GPU domain. As it - * wasn't in the GTT, there shouldn't be any way it could have been in - * a GPU cache - */ - GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); - GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); - - /* * If there's no chance of allocating enough pages for the whole * object, bail early. */ - if (obj->base.size > resource_size(&mem->region)) + if (size > resource_size(&mr->region)) return -ENOMEM; - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) + if (sg_alloc_table(st, page_count, GFP_KERNEL)) return -ENOMEM; -rebuild_st: - if (sg_alloc_table(st, page_count, GFP_KERNEL)) { - kfree(st); - return -ENOMEM; - } - /* * Get the list of pages out of our struct file. They'll be pinned * at this point until we release them. * * Fail silently without starting the shrinker */ - mapping = obj->base.filp->f_mapping; mapping_set_unevictable(mapping); noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); noreclaim |= __GFP_NORETRY | __GFP_NOWARN; sg = st->sgl; st->nents = 0; - sg_page_sizes = 0; for (i = 0; i < page_count; i++) { const unsigned int shrink[] = { I915_SHRINK_BOUND | I915_SHRINK_UNBOUND, @@ -135,10 +140,9 @@ rebuild_st: if (!i || sg->length >= max_segment || page_to_pfn(page) != last_pfn + 1) { - if (i) { - sg_page_sizes |= sg->length; + if (i) sg = sg_next(sg); - } + st->nents++; sg_set_page(sg, page, PAGE_SIZE, 0); } else { @@ -149,14 +153,67 @@ rebuild_st: /* Check that the i965g/gm workaround works. */ GEM_BUG_ON(gfp & __GFP_DMA32 && last_pfn >= 0x00100000UL); } - if (sg) { /* loop terminated early; short sg table */ - sg_page_sizes |= sg->length; + if (sg) /* loop terminated early; short sg table */ sg_mark_end(sg); - } /* Trim unused sg entries to avoid wasting memory. */ i915_sg_trim(st); + return 0; +err_sg: + sg_mark_end(sg); + if (sg != st->sgl) { + shmem_sg_free_table(st, mapping, false, false); + } else { + mapping_clear_unevictable(mapping); + sg_free_table(st); + } + + /* + * shmemfs first checks if there is enough memory to allocate the page + * and reports ENOSPC should there be insufficient, along with the usual + * ENOMEM for a genuine allocation failure. + * + * We use ENOSPC in our driver to mean that we have run out of aperture + * space and so want to translate the error from shmemfs back to our + * usual understanding of ENOMEM. + */ + if (ret == -ENOSPC) + ret = -ENOMEM; + + return ret; +} + +static int shmem_get_pages(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_memory_region *mem = obj->mm.region; + struct address_space *mapping = obj->base.filp->f_mapping; + const unsigned long page_count = obj->base.size / PAGE_SIZE; + unsigned int max_segment = i915_sg_segment_size(); + struct sg_table *st; + struct sgt_iter sgt_iter; + struct page *page; + int ret; + + /* + * Assert that the object is not currently in any GPU domain. As it + * wasn't in the GTT, there shouldn't be any way it could have been in + * a GPU cache + */ + GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); + GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); + +rebuild_st: + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return -ENOMEM; + + ret = shmem_sg_alloc_table(i915, st, obj->base.size, mem, mapping, + max_segment); + if (ret) + goto err_st; + ret = i915_gem_gtt_prepare_pages(obj, st); if (ret) { /* @@ -168,6 +225,7 @@ rebuild_st: for_each_sgt_page(page, sgt_iter, st) put_page(page); sg_free_table(st); + kfree(st); max_segment = PAGE_SIZE; goto rebuild_st; @@ -185,28 +243,12 @@ rebuild_st: if (i915_gem_object_can_bypass_llc(obj)) obj->cache_dirty = true; - __i915_gem_object_set_pages(obj, st, sg_page_sizes); + __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl)); return 0; -err_sg: - sg_mark_end(sg); err_pages: - mapping_clear_unevictable(mapping); - if (sg != st->sgl) { - struct pagevec pvec; - - pagevec_init(&pvec); - for_each_sgt_page(page, sgt_iter, st) { - if (!pagevec_add(&pvec, page)) - check_release_pagevec(&pvec); - } - if (pagevec_count(&pvec)) - check_release_pagevec(&pvec); - } - sg_free_table(st); - kfree(st); - + shmem_sg_free_table(st, mapping, false, false); /* * shmemfs first checks if there is enough memory to allocate the page * and reports ENOSPC should there be insufficient, along with the usual @@ -216,13 +258,16 @@ err_pages: * space and so want to translate the error from shmemfs back to our * usual understanding of ENOMEM. */ +err_st: if (ret == -ENOSPC) ret = -ENOMEM; + kfree(st); + return ret; } -static void +static int shmem_truncate(struct drm_i915_gem_object *obj) { /* @@ -234,12 +279,12 @@ shmem_truncate(struct drm_i915_gem_object *obj) shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); obj->mm.madv = __I915_MADV_PURGED; obj->mm.pages = ERR_PTR(-EFAULT); + + return 0; } -static void -shmem_writeback(struct drm_i915_gem_object *obj) +void __shmem_writeback(size_t size, struct address_space *mapping) { - struct address_space *mapping; struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, .nr_to_write = SWAP_CLUSTER_MAX, @@ -255,10 +300,9 @@ shmem_writeback(struct drm_i915_gem_object *obj) * instead of invoking writeback so they are aged and paged out * as normal. */ - mapping = obj->base.filp->f_mapping; /* Begin writeback on each dirty page */ - for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) { + for (i = 0; i < size >> PAGE_SHIFT; i++) { struct page *page; page = find_lock_page(mapping, i); @@ -281,6 +325,12 @@ put: } } +static void +shmem_writeback(struct drm_i915_gem_object *obj) +{ + __shmem_writeback(obj->base.size, obj->base.filp->f_mapping); +} + void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages, @@ -313,11 +363,6 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages) { - struct sgt_iter sgt_iter; - struct pagevec pvec; - struct page *page; - - GEM_WARN_ON(IS_DGFX(to_i915(obj->base.dev))); __i915_gem_object_release_shmem(obj, pages, true); i915_gem_gtt_finish_pages(obj, pages); @@ -325,25 +370,10 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_ if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_save_bit_17_swizzle(obj, pages); - mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping); - - pagevec_init(&pvec); - for_each_sgt_page(page, sgt_iter, pages) { - if (obj->mm.dirty) - set_page_dirty(page); - - if (obj->mm.madv == I915_MADV_WILLNEED) - mark_page_accessed(page); - - if (!pagevec_add(&pvec, page)) - check_release_pagevec(&pvec); - } - if (pagevec_count(&pvec)) - check_release_pagevec(&pvec); - obj->mm.dirty = false; - - sg_free_table(pages); + shmem_sg_free_table(pages, file_inode(obj->base.filp)->i_mapping, + obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED); kfree(pages); + obj->mm.dirty = false; } static void @@ -634,9 +664,10 @@ static int init_shmem(struct intel_memory_region *mem) return 0; /* Don't error, we can simply fallback to the kernel mnt */ } -static void release_shmem(struct intel_memory_region *mem) +static int release_shmem(struct intel_memory_region *mem) { i915_gemfs_fini(mem->i915); + return 0; } static const struct intel_memory_region_ops shmem_region_ops = { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index 5ab136ffdeb2..cc927e49d21f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -15,7 +15,6 @@ #include "gt/intel_gt_requests.h" -#include "dma_resv_utils.h" #include "i915_trace.h" static bool swap_available(void) @@ -37,8 +36,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) return swap_available() || obj->mm.madv == I915_MADV_DONTNEED; } -static bool unsafe_drop_pages(struct drm_i915_gem_object *obj, - unsigned long shrink, bool trylock_vm) +static int drop_pages(struct drm_i915_gem_object *obj, + unsigned long shrink, bool trylock_vm) { unsigned long flags; @@ -56,19 +55,25 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj, return false; } -static void try_to_writeback(struct drm_i915_gem_object *obj, - unsigned int flags) +static int try_to_writeback(struct drm_i915_gem_object *obj, unsigned int flags) { + if (obj->ops->shrinker_release_pages) + return obj->ops->shrinker_release_pages(obj, + !(flags & I915_SHRINK_ACTIVE), + flags & I915_SHRINK_WRITEBACK); + switch (obj->mm.madv) { case I915_MADV_DONTNEED: i915_gem_object_truncate(obj); - return; + return 0; case __I915_MADV_PURGED: - return; + return 0; } if (flags & I915_SHRINK_WRITEBACK) i915_gem_object_writeback(obj); + + return 0; } /** @@ -148,7 +153,7 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww, */ if (shrink & I915_SHRINK_ACTIVE) /* Retire requests to unpin all idle contexts */ - intel_gt_retire_requests(&i915->gt); + intel_gt_retire_requests(to_gt(i915)); /* * As we may completely rewrite the (un)bound list whilst unbinding @@ -209,27 +214,23 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww, spin_unlock_irqrestore(&i915->mm.obj_lock, flags); - err = 0; - if (unsafe_drop_pages(obj, shrink, trylock_vm)) { - /* May arrive from get_pages on another bo */ - if (!ww) { - if (!i915_gem_object_trylock(obj)) - goto skip; - } else { - err = i915_gem_object_lock(obj, ww); - if (err) - goto skip; - } - - if (!__i915_gem_object_put_pages(obj)) { - try_to_writeback(obj, shrink); - count += obj->base.size >> PAGE_SHIFT; - } - if (!ww) - i915_gem_object_unlock(obj); + /* May arrive from get_pages on another bo */ + if (!ww) { + if (!i915_gem_object_trylock(obj, NULL)) + goto skip; + } else { + err = i915_gem_object_lock(obj, ww); + if (err) + goto skip; } - dma_resv_prune(obj->base.resv); + if (drop_pages(obj, shrink, trylock_vm) && + !__i915_gem_object_put_pages(obj) && + !try_to_writeback(obj, shrink)) + count += obj->base.size >> PAGE_SHIFT; + + if (!ww) + i915_gem_object_unlock(obj); scanned += obj->base.size >> PAGE_SHIFT; skip: @@ -404,12 +405,18 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr list_for_each_entry_safe(vma, next, &i915->ggtt.vm.bound_list, vm_link) { unsigned long count = vma->node.size >> PAGE_SHIFT; + struct drm_i915_gem_object *obj = vma->obj; if (!vma->iomap || i915_vma_is_active(vma)) continue; + if (!i915_gem_object_trylock(obj, NULL)) + continue; + if (__i915_vma_unbind(vma) == 0) freed_pages += count; + + i915_gem_object_unlock(obj); } mutex_unlock(&i915->ggtt.vm.mutex); @@ -458,6 +465,16 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, #define obj_to_i915(obj__) to_i915((obj__)->base.dev) +/** + * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By + * default all object types that support shrinking(see IS_SHRINKABLE), will also + * make the object visible to the shrinker after allocating the system memory + * pages. + * @obj: The GEM object. + * + * This is typically used for special kernel internal objects that can't be + * easily processed by the shrinker, like if they are perma-pinned. + */ void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = obj_to_i915(obj); @@ -482,13 +499,12 @@ void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj) spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } -static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj, - struct list_head *head) +static void ___i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj, + struct list_head *head) { struct drm_i915_private *i915 = obj_to_i915(obj); unsigned long flags; - GEM_BUG_ON(!i915_gem_object_has_pages(obj)); if (!i915_gem_object_is_shrinkable(obj)) return; @@ -508,14 +524,67 @@ static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj, spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } +/** + * __i915_gem_object_make_shrinkable - Move the object to the tail of the + * shrinkable list. Objects on this list might be swapped out. Used with + * WILLNEED objects. + * @obj: The GEM object. + * + * DO NOT USE. This is intended to be called on very special objects that don't + * yet have mm.pages, but are guaranteed to have potentially reclaimable pages + * underneath. + */ +void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj) +{ + ___i915_gem_object_make_shrinkable(obj, + &obj_to_i915(obj)->mm.shrink_list); +} + +/** + * __i915_gem_object_make_purgeable - Move the object to the tail of the + * purgeable list. Objects on this list might be swapped out. Used with + * DONTNEED objects. + * @obj: The GEM object. + * + * DO NOT USE. This is intended to be called on very special objects that don't + * yet have mm.pages, but are guaranteed to have potentially reclaimable pages + * underneath. + */ +void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj) +{ + ___i915_gem_object_make_shrinkable(obj, + &obj_to_i915(obj)->mm.purge_list); +} + +/** + * i915_gem_object_make_shrinkable - Move the object to the tail of the + * shrinkable list. Objects on this list might be swapped out. Used with + * WILLNEED objects. + * @obj: The GEM object. + * + * MUST only be called on objects which have backing pages. + * + * MUST be balanced with previous call to i915_gem_object_make_unshrinkable(). + */ void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj) { - __i915_gem_object_make_shrinkable(obj, - &obj_to_i915(obj)->mm.shrink_list); + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); + __i915_gem_object_make_shrinkable(obj); } +/** + * i915_gem_object_make_purgeable - Move the object to the tail of the purgeable + * list. Used with DONTNEED objects. Unlike with shrinkable objects, the + * shrinker will attempt to discard the backing pages, instead of trying to swap + * them out. + * @obj: The GEM object. + * + * MUST only be called on objects which have backing pages. + * + * MUST be balanced with previous call to i915_gem_object_make_unshrinkable(). + */ void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj) { - __i915_gem_object_make_shrinkable(obj, - &obj_to_i915(obj)->mm.purge_list); + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); + __i915_gem_object_make_purgeable(obj); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index ddd37ccb1362..7df50fd6cc7b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -399,7 +399,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem) return 0; } - if (intel_vtd_active() && GRAPHICS_VER(i915) < 8) { + if (intel_vtd_active(i915) && GRAPHICS_VER(i915) < 8) { drm_notice(&i915->drm, "%s, disabling use of stolen memory\n", "DMAR active"); @@ -488,6 +488,9 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem) return 0; } + /* Exclude the reserved region from driver use */ + mem->region.end = reserved_base - 1; + /* It is possible for the reserved area to end before the end of stolen * memory, so just consider the start. */ reserved_total = stolen_top - reserved_base; @@ -653,7 +656,7 @@ static int __i915_gem_object_create_stolen(struct intel_memory_region *mem, cache_level = HAS_LLC(mem->i915) ? I915_CACHE_LLC : I915_CACHE_NONE; i915_gem_object_set_cache_coherency(obj, cache_level); - if (WARN_ON(!i915_gem_object_trylock(obj))) + if (WARN_ON(!i915_gem_object_trylock(obj, NULL))) return -EBUSY; i915_gem_object_init_memory_region(obj, mem); @@ -720,9 +723,10 @@ static int init_stolen_smem(struct intel_memory_region *mem) return i915_gem_init_stolen(mem); } -static void release_stolen_smem(struct intel_memory_region *mem) +static int release_stolen_smem(struct intel_memory_region *mem) { i915_gem_cleanup_stolen(mem->i915); + return 0; } static const struct intel_memory_region_ops i915_region_stolen_smem_ops = { @@ -759,10 +763,11 @@ err_fini: return err; } -static void release_stolen_lmem(struct intel_memory_region *mem) +static int release_stolen_lmem(struct intel_memory_region *mem) { io_mapping_fini(&mem->iomap); i915_gem_cleanup_stolen(mem->i915); + return 0; } static const struct intel_memory_region_ops i915_region_stolen_lmem_ops = { @@ -778,6 +783,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, struct intel_uncore *uncore = &i915->uncore; struct pci_dev *pdev = to_pci_dev(i915->drm.dev); struct intel_memory_region *mem; + resource_size_t min_page_size; resource_size_t io_start; resource_size_t lmem_size; u64 lmem_base; @@ -789,8 +795,11 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, lmem_size = pci_resource_len(pdev, 2) - lmem_base; io_start = pci_resource_start(pdev, 2) + lmem_base; + min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K : + I915_GTT_PAGE_SIZE_4K; + mem = intel_memory_region_create(i915, lmem_base, lmem_size, - I915_GTT_PAGE_SIZE_4K, io_start, + min_page_size, io_start, type, instance, &i915_region_stolen_lmem_ops); if (IS_ERR(mem)) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c index 1929d6cf4150..75501db71041 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c @@ -38,12 +38,13 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data, { const unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_private *i915 = to_i915(dev); struct i915_gem_context *ctx; unsigned long idx; long ret; /* ABI: return -EIO if already wedged */ - ret = intel_gt_terminally_wedged(&to_i915(dev)->gt); + ret = intel_gt_terminally_wedged(to_gt(i915)); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 74a1ffd0d7dd..de3fe79b665a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -14,13 +14,9 @@ #include "gem/i915_gem_object.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" +#include "gem/i915_gem_ttm_move.h" #include "gem/i915_gem_ttm_pm.h" - -#include "gt/intel_engine_pm.h" -#include "gt/intel_gt.h" -#include "gt/intel_migrate.h" - #define I915_TTM_PRIO_PURGE 0 #define I915_TTM_PRIO_NO_PAGES 1 #define I915_TTM_PRIO_HAS_PAGES 2 @@ -34,7 +30,9 @@ * struct i915_ttm_tt - TTM page vector with additional private information * @ttm: The base TTM page vector. * @dev: The struct device used for dma mapping and unmapping. - * @cached_st: The cached scatter-gather table. + * @cached_rsgt: The cached scatter-gather table. + * @is_shmem: Set if using shmem. + * @filp: The shmem file, if using shmem backend. * * Note that DMA may be going on right up to the point where the page- * vector is unpopulated in delayed destroy. Hence keep the @@ -45,7 +43,10 @@ struct i915_ttm_tt { struct ttm_tt ttm; struct device *dev; - struct sg_table *cached_st; + struct i915_refct_sgt cached_rsgt; + + bool is_shmem; + struct file *filp; }; static const struct ttm_place sys_placement_flags = { @@ -103,37 +104,15 @@ static int i915_ttm_err_to_gem(int err) return err; } -static bool gpu_binds_iomem(struct ttm_resource *mem) -{ - return mem->mem_type != TTM_PL_SYSTEM; -} - -static bool cpu_maps_iomem(struct ttm_resource *mem) -{ - /* Once / if we support GGTT, this is also false for cached ttm_tts */ - return mem->mem_type != TTM_PL_SYSTEM; -} - -static enum i915_cache_level -i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res, - struct ttm_tt *ttm) -{ - return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && !gpu_binds_iomem(res) && - ttm->caching == ttm_cached) ? I915_CACHE_LLC : - I915_CACHE_NONE; -} - -static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); - static enum ttm_caching i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj) { /* - * Objects only allowed in system get cached cpu-mappings. - * Other objects get WC mapping for now. Even if in system. + * Objects only allowed in system get cached cpu-mappings, or when + * evicting lmem-only buffers to system for swapping. Other objects get + * WC mapping for now. Even if in system. */ - if (obj->mm.region->type == INTEL_MEMORY_SYSTEM && - obj->mm.n_placements <= 1) + if (obj->mm.n_placements <= 1) return ttm_cached; return ttm_write_combined; @@ -179,15 +158,103 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj, placement->busy_placement = busy; } +static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev, + struct ttm_tt *ttm, + struct ttm_operation_ctx *ctx) +{ + struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); + struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM]; + struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + const unsigned int max_segment = i915_sg_segment_size(); + const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT; + struct file *filp = i915_tt->filp; + struct sgt_iter sgt_iter; + struct sg_table *st; + struct page *page; + unsigned long i; + int err; + + if (!filp) { + struct address_space *mapping; + gfp_t mask; + + filp = shmem_file_setup("i915-shmem-tt", size, VM_NORESERVE); + if (IS_ERR(filp)) + return PTR_ERR(filp); + + mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; + + mapping = filp->f_mapping; + mapping_set_gfp_mask(mapping, mask); + GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); + + i915_tt->filp = filp; + } + + st = &i915_tt->cached_rsgt.table; + err = shmem_sg_alloc_table(i915, st, size, mr, filp->f_mapping, + max_segment); + if (err) + return err; + + err = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); + if (err) + goto err_free_st; + + i = 0; + for_each_sgt_page(page, sgt_iter, st) + ttm->pages[i++] = page; + + if (ttm->page_flags & TTM_TT_FLAG_SWAPPED) + ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED; + + return 0; + +err_free_st: + shmem_sg_free_table(st, filp->f_mapping, false, false); + + return err; +} + +static void i915_ttm_tt_shmem_unpopulate(struct ttm_tt *ttm) +{ + struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + bool backup = ttm->page_flags & TTM_TT_FLAG_SWAPPED; + struct sg_table *st = &i915_tt->cached_rsgt.table; + + shmem_sg_free_table(st, file_inode(i915_tt->filp)->i_mapping, + backup, backup); +} + +static void i915_ttm_tt_release(struct kref *ref) +{ + struct i915_ttm_tt *i915_tt = + container_of(ref, typeof(*i915_tt), cached_rsgt.kref); + struct sg_table *st = &i915_tt->cached_rsgt.table; + + GEM_WARN_ON(st->sgl); + + kfree(i915_tt); +} + +static const struct i915_refct_sgt_ops tt_rsgt_ops = { + .release = i915_ttm_tt_release +}; + static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, uint32_t page_flags) { struct ttm_resource_manager *man = ttm_manager_type(bo->bdev, bo->resource->mem_type); struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + enum ttm_caching caching; struct i915_ttm_tt *i915_tt; int ret; + if (!obj) + return NULL; + i915_tt = kzalloc(sizeof(*i915_tt), GFP_KERNEL); if (!i915_tt) return NULL; @@ -196,38 +263,66 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, man->use_tt) page_flags |= TTM_TT_FLAG_ZERO_ALLOC; - ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, - i915_ttm_select_tt_caching(obj)); - if (ret) { - kfree(i915_tt); - return NULL; + caching = i915_ttm_select_tt_caching(obj); + if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) { + page_flags |= TTM_TT_FLAG_EXTERNAL | + TTM_TT_FLAG_EXTERNAL_MAPPABLE; + i915_tt->is_shmem = true; } + ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, caching); + if (ret) + goto err_free; + + __i915_refct_sgt_init(&i915_tt->cached_rsgt, bo->base.size, + &tt_rsgt_ops); + i915_tt->dev = obj->base.dev->dev; return &i915_tt->ttm; + +err_free: + kfree(i915_tt); + return NULL; +} + +static int i915_ttm_tt_populate(struct ttm_device *bdev, + struct ttm_tt *ttm, + struct ttm_operation_ctx *ctx) +{ + struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + + if (i915_tt->is_shmem) + return i915_ttm_tt_shmem_populate(bdev, ttm, ctx); + + return ttm_pool_alloc(&bdev->pool, ttm, ctx); } static void i915_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) { struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + struct sg_table *st = &i915_tt->cached_rsgt.table; - if (i915_tt->cached_st) { - dma_unmap_sgtable(i915_tt->dev, i915_tt->cached_st, - DMA_BIDIRECTIONAL, 0); - sg_free_table(i915_tt->cached_st); - kfree(i915_tt->cached_st); - i915_tt->cached_st = NULL; + if (st->sgl) + dma_unmap_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0); + + if (i915_tt->is_shmem) { + i915_ttm_tt_shmem_unpopulate(ttm); + } else { + sg_free_table(st); + ttm_pool_free(&bdev->pool, ttm); } - ttm_pool_free(&bdev->pool, ttm); } static void i915_ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt *ttm) { struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + if (i915_tt->filp) + fput(i915_tt->filp); + ttm_tt_fini(ttm); - kfree(i915_tt); + i915_refct_sgt_put(&i915_tt->cached_rsgt); } static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo, @@ -235,6 +330,17 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo, { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + if (!obj) + return false; + + /* + * EXTERNAL objects should never be swapped out by TTM, instead we need + * to handle that ourselves. TTM will already skip such objects for us, + * but we would like to avoid grabbing locks for no good reason. + */ + if (bo->ttm && bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL) + return false; + /* Will do for now. Our pinned objects are still on TTM's LRU lists */ return i915_gem_object_evictable(obj); } @@ -245,28 +351,19 @@ static void i915_ttm_evict_flags(struct ttm_buffer_object *bo, *placement = i915_sys_placement; } -static int i915_ttm_move_notify(struct ttm_buffer_object *bo) -{ - struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - int ret; - - ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); - if (ret) - return ret; - - ret = __i915_gem_object_put_pages(obj); - if (ret) - return ret; - - return 0; -} - -static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj) +/** + * i915_ttm_free_cached_io_rsgt - Free object cached LMEM information + * @obj: The GEM object + * This function frees any LMEM-related information that is cached on + * the object. For example the radix tree for fast page lookup and the + * cached refcounted sg-table + */ +void i915_ttm_free_cached_io_rsgt(struct drm_i915_gem_object *obj) { struct radix_tree_iter iter; void __rcu **slot; - if (!obj->ttm.cached_io_st) + if (!obj->ttm.cached_io_rsgt) return; rcu_read_lock(); @@ -274,93 +371,106 @@ static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj) radix_tree_delete(&obj->ttm.get_io_page.radix, iter.index); rcu_read_unlock(); - sg_free_table(obj->ttm.cached_io_st); - kfree(obj->ttm.cached_io_st); - obj->ttm.cached_io_st = NULL; + i915_refct_sgt_put(obj->ttm.cached_io_rsgt); + obj->ttm.cached_io_rsgt = NULL; } -static void -i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj) +/** + * i915_ttm_purge - Clear an object of its memory + * @obj: The object + * + * This function is called to clear an object of it's memory when it is + * marked as not needed anymore. + * + * Return: 0 on success, negative error code on failure. + */ +int i915_ttm_purge(struct drm_i915_gem_object *obj) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + struct i915_ttm_tt *i915_tt = + container_of(bo->ttm, typeof(*i915_tt), ttm); + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + struct ttm_placement place = {}; + int ret; - if (cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) { - obj->write_domain = I915_GEM_DOMAIN_WC; - obj->read_domains = I915_GEM_DOMAIN_WC; - } else { - obj->write_domain = I915_GEM_DOMAIN_CPU; - obj->read_domains = I915_GEM_DOMAIN_CPU; - } -} + if (obj->mm.madv == __I915_MADV_PURGED) + return 0; -static void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) -{ - struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); - unsigned int cache_level; - unsigned int i; + ret = ttm_bo_validate(bo, &place, &ctx); + if (ret) + return ret; - /* - * If object was moved to an allowable region, update the object - * region to consider it migrated. Note that if it's currently not - * in an allowable region, it's evicted and we don't update the - * object region. - */ - if (intel_region_to_ttm_type(obj->mm.region) != bo->resource->mem_type) { - for (i = 0; i < obj->mm.n_placements; ++i) { - struct intel_memory_region *mr = obj->mm.placements[i]; - - if (intel_region_to_ttm_type(mr) == bo->resource->mem_type && - mr != obj->mm.region) { - i915_gem_object_release_memory_region(obj); - i915_gem_object_init_memory_region(obj, mr); - break; - } - } + if (bo->ttm && i915_tt->filp) { + /* + * The below fput(which eventually calls shmem_truncate) might + * be delayed by worker, so when directly called to purge the + * pages(like by the shrinker) we should try to be more + * aggressive and release the pages immediately. + */ + shmem_truncate_range(file_inode(i915_tt->filp), + 0, (loff_t)-1); + fput(fetch_and_zero(&i915_tt->filp)); } - obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM); - - obj->mem_flags |= cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : - I915_BO_FLAG_STRUCT_PAGE; + obj->write_domain = 0; + obj->read_domains = 0; + i915_ttm_adjust_gem_after_move(obj); + i915_ttm_free_cached_io_rsgt(obj); + obj->mm.madv = __I915_MADV_PURGED; - cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource, - bo->ttm); - i915_gem_object_set_cache_coherency(obj, cache_level); + return 0; } -static void i915_ttm_purge(struct drm_i915_gem_object *obj) +static int i915_ttm_shrinker_release_pages(struct drm_i915_gem_object *obj, + bool no_wait_gpu, + bool should_writeback) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + struct i915_ttm_tt *i915_tt = + container_of(bo->ttm, typeof(*i915_tt), ttm); struct ttm_operation_ctx ctx = { .interruptible = true, - .no_wait_gpu = false, + .no_wait_gpu = no_wait_gpu, }; struct ttm_placement place = {}; int ret; - if (obj->mm.madv == __I915_MADV_PURGED) - return; + if (!bo->ttm || bo->resource->mem_type != TTM_PL_SYSTEM) + return 0; + + GEM_BUG_ON(!i915_tt->is_shmem); + + if (!i915_tt->filp) + return 0; + + ret = ttm_bo_wait_ctx(bo, &ctx); + if (ret) + return ret; + + switch (obj->mm.madv) { + case I915_MADV_DONTNEED: + return i915_ttm_purge(obj); + case __I915_MADV_PURGED: + return 0; + } + + if (bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED) + return 0; - /* TTM's purge interface. Note that we might be reentering. */ + bo->ttm->page_flags |= TTM_TT_FLAG_SWAPPED; ret = ttm_bo_validate(bo, &place, &ctx); - if (!ret) { - obj->write_domain = 0; - obj->read_domains = 0; - i915_ttm_adjust_gem_after_move(obj); - i915_ttm_free_cached_io_st(obj); - obj->mm.madv = __I915_MADV_PURGED; + if (ret) { + bo->ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED; + return ret; } -} -static void i915_ttm_swap_notify(struct ttm_buffer_object *bo) -{ - struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - int ret = i915_ttm_move_notify(bo); + if (should_writeback) + __shmem_writeback(obj->base.size, i915_tt->filp->f_mapping); - GEM_WARN_ON(ret); - GEM_WARN_ON(obj->ttm.cached_io_st); - if (!ret && obj->mm.madv != I915_MADV_WILLNEED) - i915_ttm_purge(obj); + return 0; } static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo) @@ -369,232 +479,115 @@ static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo) if (likely(obj)) { __i915_gem_object_pages_fini(obj); - i915_ttm_free_cached_io_st(obj); + i915_ttm_free_cached_io_rsgt(obj); } } -static struct intel_memory_region * -i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type) -{ - struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); - - /* There's some room for optimization here... */ - GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM && - ttm_mem_type < I915_PL_LMEM0); - if (ttm_mem_type == I915_PL_SYSTEM) - return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM, - 0); - - return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL, - ttm_mem_type - I915_PL_LMEM0); -} - -static struct sg_table *i915_ttm_tt_get_st(struct ttm_tt *ttm) +static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm) { struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); struct sg_table *st; int ret; - if (i915_tt->cached_st) - return i915_tt->cached_st; - - st = kzalloc(sizeof(*st), GFP_KERNEL); - if (!st) - return ERR_PTR(-ENOMEM); + if (i915_tt->cached_rsgt.table.sgl) + return i915_refct_sgt_get(&i915_tt->cached_rsgt); + st = &i915_tt->cached_rsgt.table; ret = sg_alloc_table_from_pages_segment(st, ttm->pages, ttm->num_pages, 0, (unsigned long)ttm->num_pages << PAGE_SHIFT, i915_sg_segment_size(), GFP_KERNEL); if (ret) { - kfree(st); + st->sgl = NULL; return ERR_PTR(ret); } ret = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0); if (ret) { sg_free_table(st); - kfree(st); return ERR_PTR(ret); } - i915_tt->cached_st = st; - return st; + return i915_refct_sgt_get(&i915_tt->cached_rsgt); } -static struct sg_table * +/** + * i915_ttm_resource_get_st - Get a refcounted sg-table pointing to the + * resource memory + * @obj: The GEM object used for sg-table caching + * @res: The struct ttm_resource for which an sg-table is requested. + * + * This function returns a refcounted sg-table representing the memory + * pointed to by @res. If @res is the object's current resource it may also + * cache the sg_table on the object or attempt to access an already cached + * sg-table. The refcounted sg-table needs to be put when no-longer in use. + * + * Return: A valid pointer to a struct i915_refct_sgt or error pointer on + * failure. + */ +struct i915_refct_sgt * i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, struct ttm_resource *res) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); - if (!gpu_binds_iomem(res)) + if (!i915_ttm_gtt_binds_lmem(res)) return i915_ttm_tt_get_st(bo->ttm); /* * If CPU mapping differs, we need to add the ttm_tt pages to * the resulting st. Might make sense for GGTT. */ - GEM_WARN_ON(!cpu_maps_iomem(res)); - return intel_region_ttm_resource_to_st(obj->mm.region, res); -} - -static int i915_ttm_accel_move(struct ttm_buffer_object *bo, - bool clear, - struct ttm_resource *dst_mem, - struct ttm_tt *dst_ttm, - struct sg_table *dst_st) -{ - struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915), - bdev); - struct ttm_resource_manager *src_man = - ttm_manager_type(bo->bdev, bo->resource->mem_type); - struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - struct sg_table *src_st; - struct i915_request *rq; - struct ttm_tt *src_ttm = bo->ttm; - enum i915_cache_level src_level, dst_level; - int ret; + GEM_WARN_ON(!i915_ttm_cpu_maps_iomem(res)); + if (bo->resource == res) { + if (!obj->ttm.cached_io_rsgt) { + struct i915_refct_sgt *rsgt; - if (!i915->gt.migrate.context || intel_gt_is_wedged(&i915->gt)) - return -EINVAL; + rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region, + res); + if (IS_ERR(rsgt)) + return rsgt; - dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm); - if (clear) { - if (bo->type == ttm_bo_type_kernel) - return -EINVAL; - - intel_engine_pm_get(i915->gt.migrate.context->engine); - ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL, - dst_st->sgl, dst_level, - gpu_binds_iomem(dst_mem), - 0, &rq); - - if (!ret && rq) { - i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); - i915_request_put(rq); - } - intel_engine_pm_put(i915->gt.migrate.context->engine); - } else { - src_st = src_man->use_tt ? i915_ttm_tt_get_st(src_ttm) : - obj->ttm.cached_io_st; - - src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm); - intel_engine_pm_get(i915->gt.migrate.context->engine); - ret = intel_context_migrate_copy(i915->gt.migrate.context, - NULL, src_st->sgl, src_level, - gpu_binds_iomem(bo->resource), - dst_st->sgl, dst_level, - gpu_binds_iomem(dst_mem), - &rq); - if (!ret && rq) { - i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); - i915_request_put(rq); + obj->ttm.cached_io_rsgt = rsgt; } - intel_engine_pm_put(i915->gt.migrate.context->engine); + return i915_refct_sgt_get(obj->ttm.cached_io_rsgt); } - return ret; + return intel_region_ttm_resource_to_rsgt(obj->mm.region, res); } -static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, - struct ttm_resource *dst_mem, - struct ttm_tt *dst_ttm, - struct sg_table *dst_st, - bool allow_accel) +static int i915_ttm_truncate(struct drm_i915_gem_object *obj) { - int ret = -EINVAL; + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + int err; - if (allow_accel) - ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm, dst_st); - if (ret) { - struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - struct intel_memory_region *dst_reg, *src_reg; - union { - struct ttm_kmap_iter_tt tt; - struct ttm_kmap_iter_iomap io; - } _dst_iter, _src_iter; - struct ttm_kmap_iter *dst_iter, *src_iter; - - dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); - src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type); - GEM_BUG_ON(!dst_reg || !src_reg); - - dst_iter = !cpu_maps_iomem(dst_mem) ? - ttm_kmap_iter_tt_init(&_dst_iter.tt, dst_ttm) : - ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, - dst_st, dst_reg->region.start); - - src_iter = !cpu_maps_iomem(bo->resource) ? - ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) : - ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, - obj->ttm.cached_io_st, - src_reg->region.start); - - ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter); - } + WARN_ON_ONCE(obj->mm.madv == I915_MADV_WILLNEED); + + err = i915_ttm_move_notify(bo); + if (err) + return err; + + return i915_ttm_purge(obj); } -static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, - struct ttm_operation_ctx *ctx, - struct ttm_resource *dst_mem, - struct ttm_place *hop) +static void i915_ttm_swap_notify(struct ttm_buffer_object *bo) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - struct ttm_resource_manager *dst_man = - ttm_manager_type(bo->bdev, dst_mem->mem_type); - struct ttm_tt *ttm = bo->ttm; - struct sg_table *dst_st; - bool clear; int ret; - /* Sync for now. We could do the actual copy async. */ - ret = ttm_bo_wait_ctx(bo, ctx); - if (ret) - return ret; + if (!obj) + return; ret = i915_ttm_move_notify(bo); - if (ret) - return ret; - - if (obj->mm.madv != I915_MADV_WILLNEED) { + GEM_WARN_ON(ret); + GEM_WARN_ON(obj->ttm.cached_io_rsgt); + if (!ret && obj->mm.madv != I915_MADV_WILLNEED) i915_ttm_purge(obj); - ttm_resource_free(bo, &dst_mem); - return 0; - } - - /* Populate ttm with pages if needed. Typically system memory. */ - if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_TT_FLAG_SWAPPED))) { - ret = ttm_tt_populate(bo->bdev, ttm, ctx); - if (ret) - return ret; - } - - dst_st = i915_ttm_resource_get_st(obj, dst_mem); - if (IS_ERR(dst_st)) - return PTR_ERR(dst_st); - - clear = !cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); - if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) - __i915_ttm_move(bo, clear, dst_mem, bo->ttm, dst_st, true); - - ttm_bo_move_sync_cleanup(bo, dst_mem); - i915_ttm_adjust_domains_after_move(obj); - i915_ttm_free_cached_io_st(obj); - - if (gpu_binds_iomem(dst_mem) || cpu_maps_iomem(dst_mem)) { - obj->ttm.cached_io_st = dst_st; - obj->ttm.get_io_page.sg_pos = dst_st->sgl; - obj->ttm.get_io_page.sg_idx = 0; - } - - i915_ttm_adjust_gem_after_move(obj); - return 0; } static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) { - if (!cpu_maps_iomem(mem)) + if (!i915_ttm_cpu_maps_iomem(mem)) return 0; mem->bus.caching = ttm_write_combined; @@ -607,19 +600,26 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo, unsigned long page_offset) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - unsigned long base = obj->mm.region->iomap.base - obj->mm.region->region.start; struct scatterlist *sg; + unsigned long base; unsigned int ofs; + GEM_BUG_ON(!obj); GEM_WARN_ON(bo->ttm); + base = obj->mm.region->iomap.base - obj->mm.region->region.start; sg = __i915_gem_object_get_sg(obj, &obj->ttm.get_io_page, page_offset, &ofs, true); return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs; } +/* + * All callbacks need to take care not to downcast a struct ttm_buffer_object + * without checking its subclass, since it might be a TTM ghost object. + */ static struct ttm_device_funcs i915_ttm_bo_driver = { .ttm_tt_create = i915_ttm_tt_create, + .ttm_tt_populate = i915_ttm_tt_populate, .ttm_tt_unpopulate = i915_ttm_tt_unpopulate, .ttm_tt_destroy = i915_ttm_tt_destroy, .eviction_valuable = i915_ttm_eviction_valuable, @@ -649,7 +649,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, .interruptible = true, .no_wait_gpu = false, }; - struct sg_table *st; int real_num_busy; int ret; @@ -676,7 +675,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, return i915_ttm_err_to_gem(ret); } - i915_ttm_adjust_lru(obj); if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) { ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx); if (ret) @@ -687,14 +685,19 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, } if (!i915_gem_object_has_pages(obj)) { - /* Object either has a page vector or is an iomem object */ - st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st; - if (IS_ERR(st)) - return PTR_ERR(st); + struct i915_refct_sgt *rsgt = + i915_ttm_resource_get_st(obj, bo->resource); - __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl)); + if (IS_ERR(rsgt)) + return PTR_ERR(rsgt); + + GEM_BUG_ON(obj->mm.rsgt); + obj->mm.rsgt = rsgt; + __i915_gem_object_set_pages(obj, &rsgt->table, + i915_sg_dma_sizes(rsgt->table.sgl)); } + i915_ttm_adjust_lru(obj); return ret; } @@ -766,12 +769,21 @@ static void i915_ttm_put_pages(struct drm_i915_gem_object *obj, * and shrinkers will move it out if needed. */ - i915_ttm_adjust_lru(obj); + if (obj->mm.rsgt) + i915_refct_sgt_put(fetch_and_zero(&obj->mm.rsgt)); } -static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) +/** + * i915_ttm_adjust_lru - Adjust an object's position on relevant LRU lists. + * @obj: The object + */ +void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + struct i915_ttm_tt *i915_tt = + container_of(bo->ttm, typeof(*i915_tt), ttm); + bool shrinkable = + bo->ttm && i915_tt->filp && ttm_tt_is_populated(bo->ttm); /* * Don't manipulate the TTM LRUs while in TTM bo destruction. @@ -781,10 +793,53 @@ static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) return; /* + * We skip managing the shrinker LRU in set_pages() and just manage + * everything here. This does at least solve the issue with having + * temporary shmem mappings(like with evicted lmem) not being visible to + * the shrinker. Only our shmem objects are shrinkable, everything else + * we keep as unshrinkable. + * + * To make sure everything plays nice we keep an extra shrink pin in TTM + * if the underlying pages are not currently shrinkable. Once we release + * our pin, like when the pages are moved to shmem, the pages will then + * be added to the shrinker LRU, assuming the caller isn't also holding + * a pin. + * + * TODO: consider maybe also bumping the shrinker list here when we have + * already unpinned it, which should give us something more like an LRU. + * + * TODO: There is a small window of opportunity for this function to + * get called from eviction after we've dropped the last GEM refcount, + * but before the TTM deleted flag is set on the object. Avoid + * adjusting the shrinker list in such cases, since the object is + * not available to the shrinker anyway due to its zero refcount. + * To fix this properly we should move to a TTM shrinker LRU list for + * these objects. + */ + if (kref_get_unless_zero(&obj->base.refcount)) { + if (shrinkable != obj->mm.ttm_shrinkable) { + if (shrinkable) { + if (obj->mm.madv == I915_MADV_WILLNEED) + __i915_gem_object_make_shrinkable(obj); + else + __i915_gem_object_make_purgeable(obj); + } else { + i915_gem_object_make_unshrinkable(obj); + } + + obj->mm.ttm_shrinkable = shrinkable; + } + i915_gem_object_put(obj); + } + + /* * Put on the correct LRU list depending on the MADV status */ spin_lock(&bo->bdev->lru_lock); - if (obj->mm.madv != I915_MADV_WILLNEED) { + if (shrinkable) { + /* Try to keep shmem_tt from being considered for shrinking. */ + bo->priority = TTM_MAX_BO_PRIORITY - 1; + } else if (obj->mm.madv != I915_MADV_WILLNEED) { bo->priority = I915_TTM_PRIO_PURGE; } else if (!i915_gem_object_has_pages(obj)) { if (bo->priority < I915_TTM_PRIO_HAS_PAGES) @@ -823,15 +878,44 @@ static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) { struct vm_area_struct *area = vmf->vma; - struct drm_i915_gem_object *obj = - i915_ttm_to_gem(area->vm_private_data); + struct ttm_buffer_object *bo = area->vm_private_data; + struct drm_device *dev = bo->base.dev; + struct drm_i915_gem_object *obj; + vm_fault_t ret; + int idx; + + obj = i915_ttm_to_gem(bo); + if (!obj) + return VM_FAULT_SIGBUS; /* Sanity check that we allow writing into this object */ if (unlikely(i915_gem_object_is_readonly(obj) && area->vm_flags & VM_WRITE)) return VM_FAULT_SIGBUS; - return ttm_bo_vm_fault(vmf); + ret = ttm_bo_vm_reserve(bo, vmf); + if (ret) + return ret; + + if (obj->mm.madv != I915_MADV_WILLNEED) { + dma_resv_unlock(bo->base.resv); + return VM_FAULT_SIGBUS; + } + + if (drm_dev_enter(dev, &idx)) { + ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, + TTM_BO_VM_NUM_PREFAULT); + drm_dev_exit(idx); + } else { + ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); + } + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) + return ret; + + i915_ttm_adjust_lru(obj); + + dma_resv_unlock(bo->base.resv); + return ret; } static int @@ -880,16 +964,27 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj) return drm_vma_node_offset_addr(&obj->base.vma_node); } +static void i915_ttm_unmap_virtual(struct drm_i915_gem_object *obj) +{ + ttm_bo_unmap_virtual(i915_gem_to_ttm(obj)); +} + static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = { .name = "i915_gem_object_ttm", + .flags = I915_GEM_OBJECT_IS_SHRINKABLE | + I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST, .get_pages = i915_ttm_get_pages, .put_pages = i915_ttm_put_pages, - .truncate = i915_ttm_purge, + .truncate = i915_ttm_truncate, + .shrinker_release_pages = i915_ttm_shrinker_release_pages, + .adjust_lru = i915_ttm_adjust_lru, .delayed_free = i915_ttm_delayed_free, .migrate = i915_ttm_migrate, + .mmap_offset = i915_ttm_mmap_offset, + .unmap_virtual = i915_ttm_unmap_virtual, .mmap_ops = &vm_ops_ttm, }; @@ -901,6 +996,18 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo) mutex_destroy(&obj->ttm.get_io_page.lock); if (obj->ttm.created) { + /* + * We freely manage the shrinker LRU outide of the mm.pages life + * cycle. As a result when destroying the object we should be + * extra paranoid and ensure we remove it from the LRU, before + * we free the object. + * + * Touching the ttm_shrinkable outside of the object lock here + * should be safe now that the last GEM object ref was dropped. + */ + if (obj->mm.ttm_shrinkable) + i915_gem_object_make_unshrinkable(obj); + i915_ttm_backup_free(obj); /* This releases all gem object bindings to the backend. */ @@ -940,10 +1047,9 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_gem_object_init(obj, &i915_gem_ttm_obj_ops, &lock_class, flags); /* Don't put on a region list until we're either locked or fully initialized. */ - obj->mm.region = intel_memory_region_get(mem); + obj->mm.region = mem; INIT_LIST_HEAD(&obj->mm.region_link); - i915_gem_object_make_unshrinkable(obj); INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN); mutex_init(&obj->ttm.get_io_page.lock); bo_type = (obj->flags & I915_BO_ALLOC_USER) ? ttm_bo_type_device : @@ -955,6 +1061,14 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, GEM_BUG_ON(page_size && obj->mm.n_placements); /* + * Keep an extra shrink pin to prevent the object from being made + * shrinkable too early. If the ttm_tt is ever allocated in shmem, we + * drop the pin. The TTM backend manages the shrinker LRU itself, + * outside of the normal mm.pages life cycle. + */ + i915_gem_object_make_unshrinkable(obj); + + /* * If this function fails, it will call the destructor, but * our caller still owns the object. So no freeing in the * destructor until obj->ttm.created is true. @@ -980,6 +1094,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, static const struct intel_memory_region_ops ttm_system_region_ops = { .init_object = __i915_gem_ttm_object_init, + .release = intel_region_ttm_fini, }; struct intel_memory_region * @@ -999,50 +1114,3 @@ i915_gem_ttm_system_setup(struct drm_i915_private *i915, intel_memory_region_set_name(mr, "system-ttm"); return mr; } - -/** - * i915_gem_obj_copy_ttm - Copy the contents of one ttm-based gem object to - * another - * @dst: The destination object - * @src: The source object - * @allow_accel: Allow using the blitter. Otherwise TTM memcpy is used. - * @intr: Whether to perform waits interruptible: - * - * Note: The caller is responsible for assuring that the underlying - * TTM objects are populated if needed and locked. - * - * Return: Zero on success. Negative error code on error. If @intr == true, - * then it may return -ERESTARTSYS or -EINTR. - */ -int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, - struct drm_i915_gem_object *src, - bool allow_accel, bool intr) -{ - struct ttm_buffer_object *dst_bo = i915_gem_to_ttm(dst); - struct ttm_buffer_object *src_bo = i915_gem_to_ttm(src); - struct ttm_operation_ctx ctx = { - .interruptible = intr, - }; - struct sg_table *dst_st; - int ret; - - assert_object_held(dst); - assert_object_held(src); - - /* - * Sync for now. This will change with async moves. - */ - ret = ttm_bo_wait_ctx(dst_bo, &ctx); - if (!ret) - ret = ttm_bo_wait_ctx(src_bo, &ctx); - if (ret) - return ret; - - dst_st = gpu_binds_iomem(dst_bo->resource) ? - dst->ttm.cached_io_st : i915_ttm_tt_get_st(dst_bo->ttm); - - __i915_ttm_move(src_bo, false, dst_bo->resource, dst_bo->ttm, - dst_st, allow_accel); - - return 0; -} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h index 0b7291dd897c..9d698ad00853 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -5,6 +5,8 @@ #ifndef _I915_GEM_TTM_H_ #define _I915_GEM_TTM_H_ +#include <drm/ttm/ttm_placement.h> + #include "gem/i915_gem_object_types.h" /** @@ -35,7 +37,7 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); static inline struct drm_i915_gem_object * i915_ttm_to_gem(struct ttm_buffer_object *bo) { - if (GEM_WARN_ON(bo->destroy != i915_ttm_bo_destroy)) + if (bo->destroy != i915_ttm_bo_destroy) return NULL; return container_of(bo, struct drm_i915_gem_object, __do_not_access); @@ -47,10 +49,6 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, resource_size_t page_size, unsigned int flags); -int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, - struct drm_i915_gem_object *src, - bool allow_accel, bool intr); - /* Internal I915 TTM declarations and definitions below. */ #define I915_PL_LMEM0 TTM_PL_PRIV @@ -60,4 +58,37 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, struct ttm_placement *i915_ttm_sys_placement(void); +void i915_ttm_free_cached_io_rsgt(struct drm_i915_gem_object *obj); + +struct i915_refct_sgt * +i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, + struct ttm_resource *res); + +void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); + +int i915_ttm_purge(struct drm_i915_gem_object *obj); + +/** + * i915_ttm_gtt_binds_lmem - Should the memory be viewed as LMEM by the GTT? + * @mem: struct ttm_resource representing the memory. + * + * Return: true if memory should be viewed as LMEM for GTT binding purposes, + * false otherwise. + */ +static inline bool i915_ttm_gtt_binds_lmem(struct ttm_resource *mem) +{ + return mem->mem_type != I915_PL_SYSTEM; +} + +/** + * i915_ttm_cpu_maps_iomem - Should the memory be viewed as IOMEM by the CPU? + * @mem: struct ttm_resource representing the memory. + * + * Return: true if memory should be viewed as IOMEM for CPU mapping purposes. + */ +static inline bool i915_ttm_cpu_maps_iomem(struct ttm_resource *mem) +{ + /* Once / if we support GGTT, this is also false for cached ttm_tts */ + return mem->mem_type != I915_PL_SYSTEM; +} #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c new file mode 100644 index 000000000000..ee9612a3ee5e --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -0,0 +1,627 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include <drm/ttm/ttm_bo_driver.h> + +#include "i915_deps.h" +#include "i915_drv.h" +#include "intel_memory_region.h" +#include "intel_region_ttm.h" + +#include "gem/i915_gem_object.h" +#include "gem/i915_gem_region.h" +#include "gem/i915_gem_ttm.h" +#include "gem/i915_gem_ttm_move.h" + +#include "gt/intel_engine_pm.h" +#include "gt/intel_gt.h" +#include "gt/intel_migrate.h" + +/** + * DOC: Selftest failure modes for failsafe migration: + * + * For fail_gpu_migration, the gpu blit scheduled is always a clear blit + * rather than a copy blit, and then we force the failure paths as if + * the blit fence returned an error. + * + * For fail_work_allocation we fail the kmalloc of the async worker, we + * sync the gpu blit. If it then fails, or fail_gpu_migration is set to + * true, then a memcpy operation is performed sync. + */ +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +static bool fail_gpu_migration; +static bool fail_work_allocation; + +void i915_ttm_migrate_set_failure_modes(bool gpu_migration, + bool work_allocation) +{ + fail_gpu_migration = gpu_migration; + fail_work_allocation = work_allocation; +} +#endif + +static enum i915_cache_level +i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res, + struct ttm_tt *ttm) +{ + return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && + !i915_ttm_gtt_binds_lmem(res) && + ttm->caching == ttm_cached) ? I915_CACHE_LLC : + I915_CACHE_NONE; +} + +static struct intel_memory_region * +i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type) +{ + struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); + + /* There's some room for optimization here... */ + GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM && + ttm_mem_type < I915_PL_LMEM0); + if (ttm_mem_type == I915_PL_SYSTEM) + return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM, + 0); + + return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL, + ttm_mem_type - I915_PL_LMEM0); +} + +/** + * i915_ttm_adjust_domains_after_move - Adjust the GEM domains after a + * TTM move + * @obj: The gem object + */ +void i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + + if (i915_ttm_cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) { + obj->write_domain = I915_GEM_DOMAIN_WC; + obj->read_domains = I915_GEM_DOMAIN_WC; + } else { + obj->write_domain = I915_GEM_DOMAIN_CPU; + obj->read_domains = I915_GEM_DOMAIN_CPU; + } +} + +/** + * i915_ttm_adjust_gem_after_move - Adjust the GEM state after a TTM move + * @obj: The gem object + * + * Adjusts the GEM object's region, mem_flags and cache coherency after a + * TTM move. + */ +void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + unsigned int cache_level; + unsigned int i; + + /* + * If object was moved to an allowable region, update the object + * region to consider it migrated. Note that if it's currently not + * in an allowable region, it's evicted and we don't update the + * object region. + */ + if (intel_region_to_ttm_type(obj->mm.region) != bo->resource->mem_type) { + for (i = 0; i < obj->mm.n_placements; ++i) { + struct intel_memory_region *mr = obj->mm.placements[i]; + + if (intel_region_to_ttm_type(mr) == bo->resource->mem_type && + mr != obj->mm.region) { + i915_gem_object_release_memory_region(obj); + i915_gem_object_init_memory_region(obj, mr); + break; + } + } + } + + obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM); + + obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : + I915_BO_FLAG_STRUCT_PAGE; + + cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource, + bo->ttm); + i915_gem_object_set_cache_coherency(obj, cache_level); +} + +/** + * i915_ttm_move_notify - Prepare an object for move + * @bo: The ttm buffer object. + * + * This function prepares an object for move by removing all GPU bindings, + * removing all CPU mapings and finally releasing the pages sg-table. + * + * Return: 0 if successful, negative error code on error. + */ +int i915_ttm_move_notify(struct ttm_buffer_object *bo) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + int ret; + + ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); + if (ret) + return ret; + + ret = __i915_gem_object_put_pages(obj); + if (ret) + return ret; + + return 0; +} + +static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo, + bool clear, + struct ttm_resource *dst_mem, + struct ttm_tt *dst_ttm, + struct sg_table *dst_st, + const struct i915_deps *deps) +{ + struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915), + bdev); + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct i915_request *rq; + struct ttm_tt *src_ttm = bo->ttm; + enum i915_cache_level src_level, dst_level; + int ret; + + if (!to_gt(i915)->migrate.context || intel_gt_is_wedged(to_gt(i915))) + return ERR_PTR(-EINVAL); + + /* With fail_gpu_migration, we always perform a GPU clear. */ + if (I915_SELFTEST_ONLY(fail_gpu_migration)) + clear = true; + + dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm); + if (clear) { + if (bo->type == ttm_bo_type_kernel && + !I915_SELFTEST_ONLY(fail_gpu_migration)) + return ERR_PTR(-EINVAL); + + intel_engine_pm_get(to_gt(i915)->migrate.context->engine); + ret = intel_context_migrate_clear(to_gt(i915)->migrate.context, deps, + dst_st->sgl, dst_level, + i915_ttm_gtt_binds_lmem(dst_mem), + 0, &rq); + } else { + struct i915_refct_sgt *src_rsgt = + i915_ttm_resource_get_st(obj, bo->resource); + + if (IS_ERR(src_rsgt)) + return ERR_CAST(src_rsgt); + + src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm); + intel_engine_pm_get(to_gt(i915)->migrate.context->engine); + ret = intel_context_migrate_copy(to_gt(i915)->migrate.context, + deps, src_rsgt->table.sgl, + src_level, + i915_ttm_gtt_binds_lmem(bo->resource), + dst_st->sgl, dst_level, + i915_ttm_gtt_binds_lmem(dst_mem), + &rq); + + i915_refct_sgt_put(src_rsgt); + } + + intel_engine_pm_put(to_gt(i915)->migrate.context->engine); + + if (ret && rq) { + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + } + + return ret ? ERR_PTR(ret) : &rq->fence; +} + +/** + * struct i915_ttm_memcpy_arg - argument for the bo memcpy functionality. + * @_dst_iter: Storage space for the destination kmap iterator. + * @_src_iter: Storage space for the source kmap iterator. + * @dst_iter: Pointer to the destination kmap iterator. + * @src_iter: Pointer to the source kmap iterator. + * @clear: Whether to clear instead of copy. + * @src_rsgt: Refcounted scatter-gather list of source memory. + * @dst_rsgt: Refcounted scatter-gather list of destination memory. + */ +struct i915_ttm_memcpy_arg { + union { + struct ttm_kmap_iter_tt tt; + struct ttm_kmap_iter_iomap io; + } _dst_iter, + _src_iter; + struct ttm_kmap_iter *dst_iter; + struct ttm_kmap_iter *src_iter; + unsigned long num_pages; + bool clear; + struct i915_refct_sgt *src_rsgt; + struct i915_refct_sgt *dst_rsgt; +}; + +/** + * struct i915_ttm_memcpy_work - Async memcpy worker under a dma-fence. + * @fence: The dma-fence. + * @work: The work struct use for the memcpy work. + * @lock: The fence lock. Not used to protect anything else ATM. + * @irq_work: Low latency worker to signal the fence since it can't be done + * from the callback for lockdep reasons. + * @cb: Callback for the accelerated migration fence. + * @arg: The argument for the memcpy functionality. + */ +struct i915_ttm_memcpy_work { + struct dma_fence fence; + struct work_struct work; + /* The fence lock */ + spinlock_t lock; + struct irq_work irq_work; + struct dma_fence_cb cb; + struct i915_ttm_memcpy_arg arg; +}; + +static void i915_ttm_move_memcpy(struct i915_ttm_memcpy_arg *arg) +{ + ttm_move_memcpy(arg->clear, arg->num_pages, + arg->dst_iter, arg->src_iter); +} + +static void i915_ttm_memcpy_init(struct i915_ttm_memcpy_arg *arg, + struct ttm_buffer_object *bo, bool clear, + struct ttm_resource *dst_mem, + struct ttm_tt *dst_ttm, + struct i915_refct_sgt *dst_rsgt) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct intel_memory_region *dst_reg, *src_reg; + + dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); + src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type); + GEM_BUG_ON(!dst_reg || !src_reg); + + arg->dst_iter = !i915_ttm_cpu_maps_iomem(dst_mem) ? + ttm_kmap_iter_tt_init(&arg->_dst_iter.tt, dst_ttm) : + ttm_kmap_iter_iomap_init(&arg->_dst_iter.io, &dst_reg->iomap, + &dst_rsgt->table, dst_reg->region.start); + + arg->src_iter = !i915_ttm_cpu_maps_iomem(bo->resource) ? + ttm_kmap_iter_tt_init(&arg->_src_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&arg->_src_iter.io, &src_reg->iomap, + &obj->ttm.cached_io_rsgt->table, + src_reg->region.start); + arg->clear = clear; + arg->num_pages = bo->base.size >> PAGE_SHIFT; + + arg->dst_rsgt = i915_refct_sgt_get(dst_rsgt); + arg->src_rsgt = clear ? NULL : + i915_ttm_resource_get_st(obj, bo->resource); +} + +static void i915_ttm_memcpy_release(struct i915_ttm_memcpy_arg *arg) +{ + i915_refct_sgt_put(arg->src_rsgt); + i915_refct_sgt_put(arg->dst_rsgt); +} + +static void __memcpy_work(struct work_struct *work) +{ + struct i915_ttm_memcpy_work *copy_work = + container_of(work, typeof(*copy_work), work); + struct i915_ttm_memcpy_arg *arg = ©_work->arg; + bool cookie = dma_fence_begin_signalling(); + + i915_ttm_move_memcpy(arg); + dma_fence_end_signalling(cookie); + + dma_fence_signal(©_work->fence); + + i915_ttm_memcpy_release(arg); + dma_fence_put(©_work->fence); +} + +static void __memcpy_irq_work(struct irq_work *irq_work) +{ + struct i915_ttm_memcpy_work *copy_work = + container_of(irq_work, typeof(*copy_work), irq_work); + struct i915_ttm_memcpy_arg *arg = ©_work->arg; + + dma_fence_signal(©_work->fence); + i915_ttm_memcpy_release(arg); + dma_fence_put(©_work->fence); +} + +static void __memcpy_cb(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct i915_ttm_memcpy_work *copy_work = + container_of(cb, typeof(*copy_work), cb); + + if (unlikely(fence->error || I915_SELFTEST_ONLY(fail_gpu_migration))) { + INIT_WORK(©_work->work, __memcpy_work); + queue_work(system_unbound_wq, ©_work->work); + } else { + init_irq_work(©_work->irq_work, __memcpy_irq_work); + irq_work_queue(©_work->irq_work); + } +} + +static const char *get_driver_name(struct dma_fence *fence) +{ + return "i915_ttm_memcpy_work"; +} + +static const char *get_timeline_name(struct dma_fence *fence) +{ + return "unbound"; +} + +static const struct dma_fence_ops dma_fence_memcpy_ops = { + .get_driver_name = get_driver_name, + .get_timeline_name = get_timeline_name, +}; + +static struct dma_fence * +i915_ttm_memcpy_work_arm(struct i915_ttm_memcpy_work *work, + struct dma_fence *dep) +{ + int ret; + + spin_lock_init(&work->lock); + dma_fence_init(&work->fence, &dma_fence_memcpy_ops, &work->lock, 0, 0); + dma_fence_get(&work->fence); + ret = dma_fence_add_callback(dep, &work->cb, __memcpy_cb); + if (ret) { + if (ret != -ENOENT) + dma_fence_wait(dep, false); + + return ERR_PTR(I915_SELFTEST_ONLY(fail_gpu_migration) ? -EINVAL : + dep->error); + } + + return &work->fence; +} + +static struct dma_fence * +__i915_ttm_move(struct ttm_buffer_object *bo, + const struct ttm_operation_ctx *ctx, bool clear, + struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm, + struct i915_refct_sgt *dst_rsgt, bool allow_accel, + const struct i915_deps *move_deps) +{ + struct i915_ttm_memcpy_work *copy_work = NULL; + struct i915_ttm_memcpy_arg _arg, *arg = &_arg; + struct dma_fence *fence = ERR_PTR(-EINVAL); + + if (allow_accel) { + fence = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm, + &dst_rsgt->table, move_deps); + + /* + * We only need to intercept the error when moving to lmem. + * When moving to system, TTM or shmem will provide us with + * cleared pages. + */ + if (!IS_ERR(fence) && !i915_ttm_gtt_binds_lmem(dst_mem) && + !I915_SELFTEST_ONLY(fail_gpu_migration || + fail_work_allocation)) + goto out; + } + + /* If we've scheduled gpu migration. Try to arm error intercept. */ + if (!IS_ERR(fence)) { + struct dma_fence *dep = fence; + + if (!I915_SELFTEST_ONLY(fail_work_allocation)) + copy_work = kzalloc(sizeof(*copy_work), GFP_KERNEL); + + if (copy_work) { + arg = ©_work->arg; + i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm, + dst_rsgt); + fence = i915_ttm_memcpy_work_arm(copy_work, dep); + } else { + dma_fence_wait(dep, false); + fence = ERR_PTR(I915_SELFTEST_ONLY(fail_gpu_migration) ? + -EINVAL : fence->error); + } + dma_fence_put(dep); + + if (!IS_ERR(fence)) + goto out; + } else if (move_deps) { + int err = i915_deps_sync(move_deps, ctx); + + if (err) + return ERR_PTR(err); + } + + /* Error intercept failed or no accelerated migration to start with */ + if (!copy_work) + i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm, + dst_rsgt); + i915_ttm_move_memcpy(arg); + i915_ttm_memcpy_release(arg); + kfree(copy_work); + + return NULL; +out: + if (!fence && copy_work) { + i915_ttm_memcpy_release(arg); + kfree(copy_work); + } + + return fence; +} + +static int +prev_deps(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, + struct i915_deps *deps) +{ + int ret; + + ret = i915_deps_add_dependency(deps, bo->moving, ctx); + if (!ret) + ret = i915_deps_add_resv(deps, bo->base.resv, ctx); + + return ret; +} + +/** + * i915_ttm_move - The TTM move callback used by i915. + * @bo: The buffer object. + * @evict: Whether this is an eviction. + * @dst_mem: The destination ttm resource. + * @hop: If we need multihop, what temporary memory type to move to. + * + * Return: 0 if successful, negative error code otherwise. + */ +int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, + struct ttm_operation_ctx *ctx, + struct ttm_resource *dst_mem, + struct ttm_place *hop) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct ttm_resource_manager *dst_man = + ttm_manager_type(bo->bdev, dst_mem->mem_type); + struct dma_fence *migration_fence = NULL; + struct ttm_tt *ttm = bo->ttm; + struct i915_refct_sgt *dst_rsgt; + bool clear; + int ret; + + if (GEM_WARN_ON(!obj)) { + ttm_bo_move_null(bo, dst_mem); + return 0; + } + + ret = i915_ttm_move_notify(bo); + if (ret) + return ret; + + if (obj->mm.madv != I915_MADV_WILLNEED) { + i915_ttm_purge(obj); + ttm_resource_free(bo, &dst_mem); + return 0; + } + + /* Populate ttm with pages if needed. Typically system memory. */ + if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_TT_FLAG_SWAPPED))) { + ret = ttm_tt_populate(bo->bdev, ttm, ctx); + if (ret) + return ret; + } + + dst_rsgt = i915_ttm_resource_get_st(obj, dst_mem); + if (IS_ERR(dst_rsgt)) + return PTR_ERR(dst_rsgt); + + clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); + if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) { + struct i915_deps deps; + + i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); + ret = prev_deps(bo, ctx, &deps); + if (ret) { + i915_refct_sgt_put(dst_rsgt); + return ret; + } + + migration_fence = __i915_ttm_move(bo, ctx, clear, dst_mem, bo->ttm, + dst_rsgt, true, &deps); + i915_deps_fini(&deps); + } + + /* We can possibly get an -ERESTARTSYS here */ + if (IS_ERR(migration_fence)) { + i915_refct_sgt_put(dst_rsgt); + return PTR_ERR(migration_fence); + } + + if (migration_fence) { + ret = ttm_bo_move_accel_cleanup(bo, migration_fence, evict, + true, dst_mem); + if (ret) { + dma_fence_wait(migration_fence, false); + ttm_bo_move_sync_cleanup(bo, dst_mem); + } + dma_fence_put(migration_fence); + } else { + ttm_bo_move_sync_cleanup(bo, dst_mem); + } + + i915_ttm_adjust_domains_after_move(obj); + i915_ttm_free_cached_io_rsgt(obj); + + if (i915_ttm_gtt_binds_lmem(dst_mem) || i915_ttm_cpu_maps_iomem(dst_mem)) { + obj->ttm.cached_io_rsgt = dst_rsgt; + obj->ttm.get_io_page.sg_pos = dst_rsgt->table.sgl; + obj->ttm.get_io_page.sg_idx = 0; + } else { + i915_refct_sgt_put(dst_rsgt); + } + + i915_ttm_adjust_lru(obj); + i915_ttm_adjust_gem_after_move(obj); + return 0; +} + +/** + * i915_gem_obj_copy_ttm - Copy the contents of one ttm-based gem object to + * another + * @dst: The destination object + * @src: The source object + * @allow_accel: Allow using the blitter. Otherwise TTM memcpy is used. + * @intr: Whether to perform waits interruptible: + * + * Note: The caller is responsible for assuring that the underlying + * TTM objects are populated if needed and locked. + * + * Return: Zero on success. Negative error code on error. If @intr == true, + * then it may return -ERESTARTSYS or -EINTR. + */ +int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, + struct drm_i915_gem_object *src, + bool allow_accel, bool intr) +{ + struct ttm_buffer_object *dst_bo = i915_gem_to_ttm(dst); + struct ttm_buffer_object *src_bo = i915_gem_to_ttm(src); + struct ttm_operation_ctx ctx = { + .interruptible = intr, + }; + struct i915_refct_sgt *dst_rsgt; + struct dma_fence *copy_fence; + struct i915_deps deps; + int ret; + + assert_object_held(dst); + assert_object_held(src); + i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); + + ret = dma_resv_reserve_shared(src_bo->base.resv, 1); + if (ret) + return ret; + + ret = i915_deps_add_resv(&deps, dst_bo->base.resv, &ctx); + if (ret) + return ret; + + ret = i915_deps_add_resv(&deps, src_bo->base.resv, &ctx); + if (ret) + return ret; + + dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource); + copy_fence = __i915_ttm_move(src_bo, &ctx, false, dst_bo->resource, + dst_bo->ttm, dst_rsgt, allow_accel, + &deps); + + i915_deps_fini(&deps); + i915_refct_sgt_put(dst_rsgt); + if (IS_ERR_OR_NULL(copy_fence)) + return PTR_ERR_OR_ZERO(copy_fence); + + dma_resv_add_excl_fence(dst_bo->base.resv, copy_fence); + dma_resv_add_shared_fence(src_bo->base.resv, copy_fence); + + dma_fence_put(copy_fence); + + return 0; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h new file mode 100644 index 000000000000..d2e7f149e05c --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ +#ifndef _I915_GEM_TTM_MOVE_H_ +#define _I915_GEM_TTM_MOVE_H_ + +#include <linux/types.h> + +#include "i915_selftest.h" + +struct ttm_buffer_object; +struct ttm_operation_ctx; +struct ttm_place; +struct ttm_resource; +struct ttm_tt; + +struct drm_i915_gem_object; +struct i915_refct_sgt; + +int i915_ttm_move_notify(struct ttm_buffer_object *bo); + +I915_SELFTEST_DECLARE(void i915_ttm_migrate_set_failure_modes(bool gpu_migration, + bool work_allocation)); + +int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, + struct drm_i915_gem_object *src, + bool allow_accel, bool intr); + +/* Internal I915 TTM declarations and definitions below. */ + +int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, + struct ttm_operation_ctx *ctx, + struct ttm_resource *dst_mem, + struct ttm_place *hop); + +void i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj); + +void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj); + +#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c index 3b6d14b5c604..9aad84059d56 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c @@ -12,6 +12,7 @@ #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" +#include "gem/i915_gem_ttm_move.h" #include "gem/i915_gem_ttm_pm.h" /** @@ -79,6 +80,7 @@ static int i915_ttm_backup(struct i915_gem_apply_to_region *apply, err = i915_gem_obj_copy_ttm(backup, obj, pm_apply->allow_gpu, false); GEM_WARN_ON(err); + ttm_bo_wait_ctx(backup_bo, &ctx); obj->ttm.backup = backup; return 0; @@ -169,6 +171,7 @@ static int i915_ttm_restore(struct i915_gem_apply_to_region *apply, err = i915_gem_obj_copy_ttm(obj, backup, pm_apply->allow_gpu, false); GEM_WARN_ON(err); + ttm_bo_wait_ctx(backup_bo, &ctx); obj->ttm.backup = NULL; err = 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 2cb51e3dbb62..6d1a71d6404c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -530,7 +530,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, * On almost all of the older hw, we cannot tell the GPU that * a page is readonly. */ - if (!dev_priv->gt.vm->has_read_only) + if (!to_gt(dev_priv)->vm->has_read_only) return -ENODEV; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index f11325484110..dab3d30c09a0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -10,7 +10,6 @@ #include "gt/intel_engine.h" -#include "dma_resv_utils.h" #include "i915_gem_ioctls.h" #include "i915_gem_object.h" @@ -52,13 +51,6 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, } dma_resv_iter_end(&cursor); - /* - * Opportunistically prune the fences iff we know they have *all* been - * signaled. - */ - if (timeout > 0) - dma_resv_prune(resv); - return ret; } @@ -262,6 +254,6 @@ int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, unsigned int flags) { might_sleep(); - /* NOP for now. */ - return 0; + + return i915_gem_object_wait_moving_fence(obj, !!(flags & I915_WAIT_INTERRUPTIBLE)); } diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c index dbdbdc344d87..7271fbf813fa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gemfs.c +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c @@ -12,6 +12,7 @@ int i915_gemfs_init(struct drm_i915_private *i915) { + char huge_opt[] = "huge=within_size"; /* r/w */ struct file_system_type *type; struct vfsmount *gemfs; char *opts; @@ -31,10 +32,8 @@ int i915_gemfs_init(struct drm_i915_private *i915) */ opts = NULL; - if (intel_vtd_active()) { + if (intel_vtd_active(i915)) { if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { - static char huge_opt[] = "huge=within_size"; /* r/w */ - opts = huge_opt; drm_info(&i915->drm, "Transparent Hugepage mode '%s'\n", diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index b2003133deaf..11f0aa65f8a3 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -22,6 +22,22 @@ #include "selftests/mock_region.h" #include "selftests/i915_random.h" +static struct i915_gem_context *hugepage_ctx(struct drm_i915_private *i915, + struct file *file) +{ + struct i915_gem_context *ctx = live_context(i915, file); + struct i915_address_space *vm; + + if (IS_ERR(ctx)) + return ctx; + + vm = ctx->vm; + if (vm) + WRITE_ONCE(vm->scrub_64K, true); + + return ctx; +} + static const unsigned int page_sizes[] = { I915_GTT_PAGE_SIZE_2M, I915_GTT_PAGE_SIZE_64K, @@ -552,7 +568,7 @@ out_unpin: out_put: i915_gem_object_put(obj); out_region: - intel_memory_region_put(mem); + intel_memory_region_destroy(mem); return err; } @@ -959,6 +975,8 @@ static int igt_mock_ppgtt_64K(void *arg) __i915_gem_object_put_pages(obj); i915_gem_object_unlock(obj); i915_gem_object_put(obj); + + i915_gem_drain_freed_objects(i915); } } @@ -1080,10 +1098,6 @@ static int __igt_write_huge(struct intel_context *ce, if (IS_ERR(vma)) return PTR_ERR(vma); - err = i915_vma_unbind(vma); - if (err) - return err; - err = i915_vma_pin(vma, size, 0, flags | offset); if (err) { /* @@ -1117,7 +1131,7 @@ out_vma_unpin: return err; } -static int igt_write_huge(struct i915_gem_context *ctx, +static int igt_write_huge(struct drm_i915_private *i915, struct drm_i915_gem_object *obj) { struct i915_gem_engines *engines; @@ -1127,6 +1141,8 @@ static int igt_write_huge(struct i915_gem_context *ctx, IGT_TIMEOUT(end_time); unsigned int max_page_size; unsigned int count; + struct i915_gem_context *ctx; + struct file *file; u64 max; u64 num; u64 size; @@ -1134,6 +1150,16 @@ static int igt_write_huge(struct i915_gem_context *ctx, int i, n; int err = 0; + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = hugepage_ctx(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); size = obj->base.size; @@ -1153,7 +1179,7 @@ static int igt_write_huge(struct i915_gem_context *ctx, } i915_gem_context_unlock_engines(ctx); if (!n) - return 0; + goto out; /* * To keep things interesting when alternating between engines in our @@ -1215,6 +1241,8 @@ static int igt_write_huge(struct i915_gem_context *ctx, kfree(order); +out: + fput(file); return err; } @@ -1277,8 +1305,7 @@ static u32 igt_random_size(struct rnd_state *prng, static int igt_ppgtt_smoke_huge(void *arg) { - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_private *i915 = arg; struct drm_i915_gem_object *obj; I915_RND_STATE(prng); struct { @@ -1302,6 +1329,7 @@ static int igt_ppgtt_smoke_huge(void *arg) u32 min = backends[i].min; u32 max = backends[i].max; u32 size = max; + try_again: size = igt_random_size(&prng, min, rounddown_pow_of_two(size)); @@ -1336,7 +1364,7 @@ try_again: goto out_unpin; } - err = igt_write_huge(ctx, obj); + err = igt_write_huge(i915, obj); if (err) { pr_err("%s write-huge failed with size=%u, i=%d\n", __func__, size, i); @@ -1363,8 +1391,7 @@ out_put: static int igt_ppgtt_sanity_check(void *arg) { - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_private *i915 = arg; unsigned int supported = INTEL_INFO(i915)->page_sizes; struct { igt_create_fn fn; @@ -1431,7 +1458,7 @@ static int igt_ppgtt_sanity_check(void *arg) if (pages) obj->mm.page_sizes.sg = pages; - err = igt_write_huge(ctx, obj); + err = igt_write_huge(i915, obj); i915_gem_object_lock(obj, NULL); i915_gem_object_unpin_pages(obj); @@ -1458,15 +1485,27 @@ out: static int igt_tmpfs_fallback(void *arg) { - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_private *i915 = arg; + struct i915_address_space *vm; + struct i915_gem_context *ctx; struct vfsmount *gemfs = i915->mm.gemfs; - struct i915_address_space *vm = i915_gem_context_get_eb_vm(ctx); struct drm_i915_gem_object *obj; struct i915_vma *vma; + struct file *file; u32 *vaddr; int err = 0; + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = hugepage_ctx(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + vm = i915_gem_context_get_eb_vm(ctx); + /* * Make sure that we don't burst into a ball of flames upon falling back * to tmpfs, which we rely on if on the off-chance we encouter a failure @@ -1510,33 +1549,47 @@ out_restore: i915->mm.gemfs = gemfs; i915_vm_put(vm); +out: + fput(file); return err; } static int igt_shrink_thp(void *arg) { - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; - struct i915_address_space *vm = i915_gem_context_get_eb_vm(ctx); + struct drm_i915_private *i915 = arg; + struct i915_address_space *vm; + struct i915_gem_context *ctx; struct drm_i915_gem_object *obj; struct i915_gem_engines_iter it; struct intel_context *ce; struct i915_vma *vma; + struct file *file; unsigned int flags = PIN_USER; unsigned int n; bool should_swap; - int err = 0; + int err; + + if (!igt_can_allocate_thp(i915)) { + pr_info("missing THP support, skipping\n"); + return 0; + } + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = hugepage_ctx(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + vm = i915_gem_context_get_eb_vm(ctx); /* * Sanity check shrinking huge-paged object -- make sure nothing blows * up. */ - if (!igt_can_allocate_thp(i915)) { - pr_info("missing THP support, skipping\n"); - goto out_vm; - } - obj = i915_gem_object_create_shmem(i915, SZ_2M); if (IS_ERR(obj)) { err = PTR_ERR(obj); @@ -1626,7 +1679,8 @@ out_put: i915_gem_object_put(obj); out_vm: i915_vm_put(vm); - +out: + fput(file); return err; } @@ -1651,7 +1705,7 @@ int i915_gem_huge_page_mock_selftests(void) mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL; mkwrite_device_info(dev_priv)->ppgtt_size = 48; - ppgtt = i915_ppgtt_create(&dev_priv->gt, 0); + ppgtt = i915_ppgtt_create(to_gt(dev_priv), 0); if (IS_ERR(ppgtt)) { err = PTR_ERR(ppgtt); goto out_unlock; @@ -1687,36 +1741,14 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ppgtt_smoke_huge), SUBTEST(igt_ppgtt_sanity_check), }; - struct i915_gem_context *ctx; - struct i915_address_space *vm; - struct file *file; - int err; if (!HAS_PPGTT(i915)) { pr_info("PPGTT not supported, skipping live-selftests\n"); return 0; } - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - ctx = live_context(i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out_file; - } - - vm = ctx->vm; - if (vm) - WRITE_ONCE(vm->scrub_64K, true); - - err = i915_subtests(tests, ctx); - -out_file: - fput(file); - return err; + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index 8402ed925a69..75947e9dada2 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -592,7 +592,7 @@ int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_client_tiled_blits), }; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; return i915_live_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index b32f7fed2d9c..3f41fe5ec9d4 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -88,9 +88,9 @@ static int live_nop_switch(void *arg) rq = i915_request_get(this); i915_request_add(this); } - if (i915_request_wait(rq, 0, HZ) < 0) { + if (i915_request_wait(rq, 0, 10 * HZ) < 0) { pr_err("Failed to populated %d contexts\n", nctx); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(to_gt(i915)); i915_request_put(rq); err = -EIO; goto out_file; @@ -146,7 +146,7 @@ static int live_nop_switch(void *arg) if (i915_request_wait(rq, 0, HZ / 5) < 0) { pr_err("Switching between %ld contexts timed out\n", prime); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(to_gt(i915)); i915_request_put(rq); break; } @@ -1223,7 +1223,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915, return 0; if (flags & TEST_RESET) - igt_global_reset_lock(&i915->gt); + igt_global_reset_lock(to_gt(i915)); obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) { @@ -1306,7 +1306,7 @@ out_put: out_unlock: if (flags & TEST_RESET) - igt_global_reset_unlock(&i915->gt); + igt_global_reset_unlock(to_gt(i915)); if (ret) pr_err("%s: Failed with %d!\n", name, ret); @@ -1481,10 +1481,10 @@ static int check_scratch(struct i915_address_space *vm, u64 offset) static int write_to_scratch(struct i915_gem_context *ctx, struct intel_engine_cs *engine, + struct drm_i915_gem_object *obj, u64 offset, u32 value) { struct drm_i915_private *i915 = ctx->i915; - struct drm_i915_gem_object *obj; struct i915_address_space *vm; struct i915_request *rq; struct i915_vma *vma; @@ -1497,15 +1497,9 @@ static int write_to_scratch(struct i915_gem_context *ctx, if (err) return err; - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto out; - } + if (IS_ERR(cmd)) + return PTR_ERR(cmd); *cmd++ = MI_STORE_DWORD_IMM_GEN4; if (GRAPHICS_VER(i915) >= 8) { @@ -1569,17 +1563,19 @@ err_unpin: i915_vma_unpin(vma); out_vm: i915_vm_put(vm); -out: - i915_gem_object_put(obj); + + if (!err) + err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); + return err; } static int read_from_scratch(struct i915_gem_context *ctx, struct intel_engine_cs *engine, + struct drm_i915_gem_object *obj, u64 offset, u32 *value) { struct drm_i915_private *i915 = ctx->i915; - struct drm_i915_gem_object *obj; struct i915_address_space *vm; const u32 result = 0x100; struct i915_request *rq; @@ -1594,10 +1590,6 @@ static int read_from_scratch(struct i915_gem_context *ctx, if (err) return err; - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - if (GRAPHICS_VER(i915) >= 8) { const u32 GPR0 = engine->mmio_base + 0x600; @@ -1615,7 +1607,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto out; + goto err_unpin; } memset(cmd, POISON_INUSE, PAGE_SIZE); @@ -1651,7 +1643,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto out; + goto err_unpin; } memset(cmd, POISON_INUSE, PAGE_SIZE); @@ -1722,8 +1714,10 @@ err_unpin: i915_vma_unpin(vma); out_vm: i915_vm_put(vm); -out: - i915_gem_object_put(obj); + + if (!err) + err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); + return err; } @@ -1757,6 +1751,7 @@ static int igt_vm_isolation(void *arg) { struct drm_i915_private *i915 = arg; struct i915_gem_context *ctx_a, *ctx_b; + struct drm_i915_gem_object *obj_a, *obj_b; unsigned long num_engines, count; struct intel_engine_cs *engine; struct igt_live_test t; @@ -1810,6 +1805,18 @@ static int igt_vm_isolation(void *arg) vm_total = ctx_a->vm->total; GEM_BUG_ON(ctx_b->vm->total != vm_total); + obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj_a)) { + err = PTR_ERR(obj_a); + goto out_file; + } + + obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj_b)) { + err = PTR_ERR(obj_b); + goto put_a; + } + count = 0; num_engines = 0; for_each_uabi_engine(engine, i915) { @@ -1832,13 +1839,13 @@ static int igt_vm_isolation(void *arg) I915_GTT_PAGE_SIZE, vm_total, sizeof(u32), alignof_dword); - err = write_to_scratch(ctx_a, engine, + err = write_to_scratch(ctx_a, engine, obj_a, offset, 0xdeadbeef); if (err == 0) - err = read_from_scratch(ctx_b, engine, + err = read_from_scratch(ctx_b, engine, obj_b, offset, &value); if (err) - goto out_file; + goto put_b; if (value != expected) { pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", @@ -1847,7 +1854,7 @@ static int igt_vm_isolation(void *arg) lower_32_bits(offset), this); err = -EINVAL; - goto out_file; + goto put_b; } this++; @@ -1858,6 +1865,10 @@ static int igt_vm_isolation(void *arg) pr_info("Checked %lu scratch offsets across %lu engines\n", count, num_engines); +put_b: + i915_gem_object_put(obj_b); +put_a: + i915_gem_object_put(obj_a); out_file: if (igt_live_test_end(&t)) err = -EIO; @@ -1877,7 +1888,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_vm_isolation), }; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; return i915_live_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index 4a6bb64c3a35..3cc74b0fed06 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -102,7 +102,7 @@ static int igt_dmabuf_import_same_driver_lmem(void *arg) obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &lmem, 1); if (IS_ERR(obj)) { pr_err("__i915_gem_object_create_user failed with err=%ld\n", - PTR_ERR(dmabuf)); + PTR_ERR(obj)); err = PTR_ERR(obj); goto out_ret; } @@ -158,7 +158,7 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, regions, num_regions); if (IS_ERR(obj)) { pr_err("__i915_gem_object_create_user failed with err=%ld\n", - PTR_ERR(dmabuf)); + PTR_ERR(obj)); err = PTR_ERR(obj); goto out_ret; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index 28a700f08b49..ecb691c81d1e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -4,6 +4,7 @@ */ #include "gt/intel_migrate.h" +#include "gem/i915_gem_ttm_move.h" static int igt_fill_check_buffer(struct drm_i915_gem_object *obj, bool fill) @@ -227,17 +228,38 @@ out_put: return err; } +static int igt_lmem_pages_failsafe_migrate(void *arg) +{ + int fail_gpu, fail_alloc, ret; + + for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) { + for (fail_alloc = 0; fail_alloc < 2; ++fail_alloc) { + pr_info("Simulated failure modes: gpu: %d, alloc: %d\n", + fail_gpu, fail_alloc); + i915_ttm_migrate_set_failure_modes(fail_gpu, + fail_alloc); + ret = igt_lmem_pages_migrate(arg); + if (ret) + goto out_err; + } + } + +out_err: + i915_ttm_migrate_set_failure_modes(false, false); + return ret; +} + int i915_gem_migrate_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_smem_create_migrate), SUBTEST(igt_lmem_create_migrate), SUBTEST(igt_same_create_migrate), - SUBTEST(igt_lmem_pages_migrate), + SUBTEST(igt_lmem_pages_failsafe_migrate), }; if (!HAS_LMEM(i915)) return 0; - return intel_gt_live_subtests(tests, &i915->gt); + return intel_gt_live_subtests(tests, to_gt(i915)); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 6d30cdfa80f3..c6291429b00c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -84,6 +84,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, struct rnd_state *prng) { const unsigned long npages = obj->base.size / PAGE_SIZE; + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_ggtt_view view; struct i915_vma *vma; unsigned long page; @@ -141,7 +142,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, if (offset >= obj->base.size) goto out; - intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt); + intel_gt_flush_ggtt_writes(to_gt(i915)); p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); cpu = kmap(p) + offset_in_page(offset); @@ -175,6 +176,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj, { const unsigned int nreal = obj->scratch / PAGE_SIZE; const unsigned long npages = obj->base.size / PAGE_SIZE; + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_vma *vma; unsigned long page; int err; @@ -234,7 +236,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj, if (offset >= obj->base.size) continue; - intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt); + intel_gt_flush_ggtt_writes(to_gt(i915)); p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); cpu = kmap(p) + offset_in_page(offset); @@ -616,14 +618,14 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, static void disable_retire_worker(struct drm_i915_private *i915) { i915_gem_driver_unregister__shrinker(i915); - intel_gt_pm_get(&i915->gt); - cancel_delayed_work_sync(&i915->gt.requests.retire_work); + intel_gt_pm_get(to_gt(i915)); + cancel_delayed_work_sync(&to_gt(i915)->requests.retire_work); } static void restore_retire_worker(struct drm_i915_private *i915) { igt_flush_test(i915); - intel_gt_pm_put(&i915->gt); + intel_gt_pm_put(to_gt(i915)); i915_gem_driver_register__shrinker(i915); } @@ -651,8 +653,8 @@ static int igt_mmap_offset_exhaustion(void *arg) /* Disable background reaper */ disable_retire_worker(i915); - GEM_BUG_ON(!i915->gt.awake); - intel_gt_retire_requests(&i915->gt); + GEM_BUG_ON(!to_gt(i915)->awake); + intel_gt_retire_requests(to_gt(i915)); i915_gem_drain_freed_objects(i915); /* Trim the device mmap space to only a page */ @@ -728,7 +730,7 @@ static int igt_mmap_offset_exhaustion(void *arg) /* Now fill with busy dead objects that we expect to reap */ for (loop = 0; loop < 3; loop++) { - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) break; obj = i915_gem_object_create_internal(i915, PAGE_SIZE); @@ -942,7 +944,7 @@ static int __igt_mmap(struct drm_i915_private *i915, } if (type == I915_MMAP_TYPE_GTT) - intel_gt_flush_ggtt_writes(&i915->gt); + intel_gt_flush_ggtt_writes(to_gt(i915)); err = wc_check(obj); if (err == -ENXIO) @@ -1049,7 +1051,7 @@ static int __igt_mmap_access(struct drm_i915_private *i915, goto out_unmap; } - intel_gt_flush_ggtt_writes(&i915->gt); + intel_gt_flush_ggtt_writes(to_gt(i915)); err = access_process_vm(current, addr, &x, sizeof(x), 0); if (err != sizeof(x)) { @@ -1065,7 +1067,7 @@ static int __igt_mmap_access(struct drm_i915_private *i915, goto out_unmap; } - intel_gt_flush_ggtt_writes(&i915->gt); + intel_gt_flush_ggtt_writes(to_gt(i915)); err = __get_user(y, ptr); if (err) { @@ -1165,7 +1167,7 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915, } if (type == I915_MMAP_TYPE_GTT) - intel_gt_flush_ggtt_writes(&i915->gt); + intel_gt_flush_ggtt_writes(to_gt(i915)); for_each_uabi_engine(engine, i915) { struct i915_request *rq; @@ -1366,20 +1368,10 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915, } } - if (!obj->ops->mmap_ops) { - err = check_absent(addr, obj->base.size); - if (err) { - pr_err("%s: was not absent\n", obj->mm.region->name); - goto out_unmap; - } - } else { - /* ttm allows access to evicted regions by design */ - - err = check_present(addr, obj->base.size); - if (err) { - pr_err("%s: was not present\n", obj->mm.region->name); - goto out_unmap; - } + err = check_absent(addr, obj->base.size); + if (err) { + pr_err("%s: was not absent\n", obj->mm.region->name); + goto out_unmap; } out_unmap: |