diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_vma.c')
| -rw-r--r-- | drivers/gpu/drm/i915/i915_vma.c | 171 |
1 files changed, 136 insertions, 35 deletions
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index f51fd9fd4c89..2c0a63664e13 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -24,16 +24,21 @@ #include <linux/sched/mm.h> #include <linux/dma-fence-array.h> + #include <drm/drm_gem.h> +#include <drm/drm_print.h> -#include "display/intel_display.h" +#include "display/intel_fb.h" #include "display/intel_frontbuffer.h" #include "gem/i915_gem_lmem.h" +#include "gem/i915_gem_object_frontbuffer.h" #include "gem/i915_gem_tiling.h" #include "gt/intel_engine.h" #include "gt/intel_engine_heartbeat.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_pm.h" #include "gt/intel_gt_requests.h" +#include "gt/intel_tlb.h" #include "i915_drv.h" #include "i915_gem_evict.h" @@ -74,14 +79,14 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason) char buf[512]; if (!vma->node.stack) { - drm_dbg(&to_i915(vma->obj->base.dev)->drm, + drm_dbg(vma->obj->base.dev, "vma.node [%08llx + %08llx] %s: unknown owner\n", vma->node.start, vma->node.size, reason); return; } stack_depot_snprint(vma->node.stack, buf, sizeof(buf), 0); - drm_dbg(&to_i915(vma->obj->base.dev)->drm, + drm_dbg(vma->obj->base.dev, "vma.node [%08llx + %08llx] %s: inserted at %s\n", vma->node.start, vma->node.size, reason, buf); } @@ -101,12 +106,42 @@ static inline struct i915_vma *active_to_vma(struct i915_active *ref) static int __i915_vma_active(struct i915_active *ref) { - return i915_vma_tryget(active_to_vma(ref)) ? 0 : -ENOENT; + struct i915_vma *vma = active_to_vma(ref); + + if (!i915_vma_tryget(vma)) + return -ENOENT; + + /* + * Exclude global GTT VMA from holding a GT wakeref + * while active, otherwise GPU never goes idle. + */ + if (!i915_vma_is_ggtt(vma)) { + /* + * Since we and our _retire() counterpart can be + * called asynchronously, storing a wakeref tracking + * handle inside struct i915_vma is not safe, and + * there is no other good place for that. Hence, + * use untracked variants of intel_gt_pm_get/put(). + */ + intel_gt_pm_get_untracked(vma->vm->gt); + } + + return 0; } static void __i915_vma_retire(struct i915_active *ref) { - i915_vma_put(active_to_vma(ref)); + struct i915_vma *vma = active_to_vma(ref); + + if (!i915_vma_is_ggtt(vma)) { + /* + * Since we can be called from atomic contexts, + * use an async variant of intel_gt_pm_put(). + */ + intel_gt_pm_put_async_untracked(vma->vm->gt); + } + + i915_vma_put(vma); } static struct i915_vma * @@ -315,7 +350,7 @@ struct i915_vma_work { struct i915_vma_resource *vma_res; struct drm_i915_gem_object *obj; struct i915_sw_dma_fence_cb cb; - enum i915_cache_level cache_level; + unsigned int pat_index; unsigned int flags; }; @@ -334,7 +369,7 @@ static void __vma_bind(struct dma_fence_work *work) return; vma_res->ops->bind_vma(vma_res->vm, &vw->stash, - vma_res, vw->cache_level, vw->flags); + vma_res, vw->pat_index, vw->flags); } static void __vma_release(struct dma_fence_work *work) @@ -426,7 +461,7 @@ i915_vma_resource_init_from_vma(struct i915_vma_resource *vma_res, /** * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. * @vma: VMA to map - * @cache_level: mapping cache level + * @pat_index: PAT index to set in PTE * @flags: flags like global or local mapping * @work: preallocated worker for allocating and binding the PTE * @vma_res: pointer to a preallocated vma resource. The resource is either @@ -437,7 +472,7 @@ i915_vma_resource_init_from_vma(struct i915_vma_resource *vma_res, * Note that DMA addresses are also the only part of the SG table we care about. */ int i915_vma_bind(struct i915_vma *vma, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags, struct i915_vma_work *work, struct i915_vma_resource *vma_res) @@ -507,7 +542,7 @@ int i915_vma_bind(struct i915_vma *vma, struct dma_fence *prev; work->vma_res = i915_vma_resource_get(vma->resource); - work->cache_level = cache_level; + work->pat_index = pat_index; work->flags = bind_flags; /* @@ -537,7 +572,7 @@ int i915_vma_bind(struct i915_vma *vma, return ret; } - vma->ops->bind_vma(vma->vm, NULL, vma->resource, cache_level, + vma->ops->bind_vma(vma->vm, NULL, vma->resource, pat_index, bind_flags); } @@ -739,13 +774,14 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color) /** * i915_vma_insert - finds a slot for the vma in its address space * @vma: the vma + * @ww: An optional struct i915_gem_ww_ctx * @size: requested size in bytes (can be larger than the VMA) * @alignment: required alignment * @flags: mask of PIN_* flags to use * * First we try to allocate some free space that meets the requirements for - * the VMA. Failiing that, if the flags permit, it will evict an old VMA, - * preferrably the oldest idle entry to make room for the new VMA. + * the VMA. Failing that, if the flags permit, it will evict an old VMA, + * preferably the oldest idle entry to make room for the new VMA. * * Returns: * 0 on success, negative error code otherwise. @@ -804,7 +840,7 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, * attempt to find space. */ if (size > end - 2 * guard) { - drm_dbg(&to_i915(vma->obj->base.dev)->drm, + drm_dbg(vma->obj->base.dev, "Attempting to bind an object larger than the aperture: request=%llu > %s aperture=%llu\n", size, flags & PIN_MAPPABLE ? "mappable" : "total", end); return -ENOSPC; @@ -813,7 +849,7 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, color = 0; if (i915_vm_has_cache_coloring(vma->vm)) - color = vma->obj->cache_level; + color = vma->obj->pat_index; if (flags & PIN_OFFSET_FIXED) { u64 offset = flags & PIN_OFFSET_MASK; @@ -843,7 +879,7 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, * objects which need to be tightly packed into the low 32bits. * * Note that we assume that GGTT are limited to 4GiB for the - * forseeable future. See also i915_ggtt_offset(). + * foreseeable future. See also i915_ggtt_offset(). */ if (upper_32_bits(end - 1) && vma->page_sizes.sg > I915_GTT_PAGE_SIZE && @@ -967,7 +1003,7 @@ rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset, /* * The DE ignores the PTEs for the padding tiles, the sg entry - * here is just a conenience to indicate how many padding PTEs + * here is just a convenience to indicate how many padding PTEs * to insert at this spot. */ sg_set_page(sg, NULL, left, 0); @@ -1338,6 +1374,12 @@ err_unpin: void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb) { + struct intel_gt *gt; + int id; + + if (!tlb) + return; + /* * Before we release the pages that were bound by this vma, we * must invalidate all the TLBs that may still have a reference @@ -1346,7 +1388,9 @@ void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb) * the most recent TLB invalidation seqno, and if we have not yet * flushed the TLBs upon release, perform a full invalidation. */ - WRITE_ONCE(*tlb, intel_gt_next_invalidate_tlb_full(vm->gt)); + for_each_gt(gt, vm->i915, id) + WRITE_ONCE(tlb[id], + intel_gt_next_invalidate_tlb_full(gt)); } static void __vma_put_pages(struct i915_vma *vma, unsigned int count) @@ -1393,7 +1437,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, struct i915_vma_work *work = NULL; struct dma_fence *moving = NULL; struct i915_vma_resource *vma_res = NULL; - intel_wakeref_t wakeref = 0; + intel_wakeref_t wakeref; unsigned int bound; int err; @@ -1413,8 +1457,14 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (err) return err; - if (flags & PIN_GLOBAL) - wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); + /* + * In case of a global GTT, we must hold a runtime-pm wakeref + * while global PTEs are updated. In other cases, we hold + * the rpm reference while the VMA is active. Since runtime + * resume may require allocations, which are forbidden inside + * vm->mutex, get the first rpm wakeref outside of the mutex. + */ + wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); if (flags & vma->vm->bind_async_flags) { /* lock VM */ @@ -1517,7 +1567,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, GEM_BUG_ON(!vma->pages); err = i915_vma_bind(vma, - vma->obj->cache_level, + vma->obj->pat_index, flags, work, vma_res); vma_res = NULL; if (err) @@ -1547,11 +1597,22 @@ err_unlock: err_vma_res: i915_vma_resource_free(vma_res); err_fence: - if (work) - dma_fence_work_commit_imm(&work->base); + if (work) { + /* + * When pinning VMA to GGTT on CHV or BXT with VTD enabled, + * commit VMA binding asynchronously to avoid risk of lock + * inversion among reservation_ww locks held here and + * cpu_hotplug_lock acquired from stop_machine(), which we + * wrap around GGTT updates when running in those environments. + */ + if (i915_vma_is_ggtt(vma) && + intel_vm_no_concurrent_access_wa(vma->vm->i915)) + dma_fence_work_commit(&work->base); + else + dma_fence_work_commit_imm(&work->base); + } err_rpm: - if (wakeref) - intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); + intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); if (moving) dma_fence_put(moving); @@ -1560,6 +1621,26 @@ err_rpm: return err; } +int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +{ + struct i915_gem_ww_ctx ww; + int err; + + i915_gem_ww_ctx_init(&ww, true); +retry: + err = i915_gem_object_lock(vma->obj, &ww); + if (!err) + err = i915_vma_pin_ww(vma, &ww, size, alignment, flags); + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + + return err; +} + static void flush_idle_contexts(struct intel_gt *gt) { struct intel_engine_cs *engine; @@ -1628,6 +1709,26 @@ int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, return err; } +/** + * i915_ggtt_clear_scanout - Clear scanout flag for all objects ggtt vmas + * @obj: i915 GEM object + * This function clears scanout flags for objects ggtt vmas. These flags are set + * when object is pinned for display use and this function to clear them all is + * targeted to be called by frontbuffer tracking code when the frontbuffer is + * about to be released. + */ +void i915_ggtt_clear_scanout(struct drm_i915_gem_object *obj) +{ + struct i915_vma *vma; + + spin_lock(&obj->vma.lock); + for_each_ggtt_vma(vma, obj) { + i915_vma_clear_scanout(vma); + vma->display_alignment = I915_GTT_MIN_ALIGNMENT; + } + spin_unlock(&obj->vma.lock); +} + static void __vma_close(struct i915_vma *vma, struct intel_gt *gt) { /* @@ -1714,7 +1815,7 @@ static void release_references(struct i915_vma *vma, struct intel_gt *gt, i915_vma_free(vma); } -/** +/* * i915_vma_destroy_locked - Remove all weak reference to the vma and put * the initial reference. * @@ -1903,13 +2004,13 @@ int _i915_vma_move_to_active(struct i915_vma *vma, } if (flags & EXEC_OBJECT_WRITE) { - struct intel_frontbuffer *front; + struct i915_frontbuffer *front; - front = __intel_frontbuffer_get(obj); + front = i915_gem_object_frontbuffer_lookup(obj); if (unlikely(front)) { - if (intel_frontbuffer_invalidate(front, ORIGIN_CS)) + if (intel_frontbuffer_invalidate(&front->base, ORIGIN_CS)) i915_active_add_request(&front->write, rq); - intel_frontbuffer_put(front); + i915_gem_object_frontbuffer_put(front); } } @@ -1991,7 +2092,7 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async) if (async) unbind_fence = i915_vma_resource_unbind(vma_res, - &vma->obj->mm.tlb); + vma->obj->mm.tlb); else unbind_fence = i915_vma_resource_unbind(vma_res, NULL); @@ -2008,7 +2109,7 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async) dma_fence_put(unbind_fence); unbind_fence = NULL; } - vma_invalidate_tlb(vma->vm, &vma->obj->mm.tlb); + vma_invalidate_tlb(vma->vm, vma->obj->mm.tlb); } /* @@ -2090,7 +2191,7 @@ static struct dma_fence *__i915_vma_unbind_async(struct i915_vma *vma) int i915_vma_unbind(struct i915_vma *vma) { struct i915_address_space *vm = vma->vm; - intel_wakeref_t wakeref = 0; + intel_wakeref_t wakeref = NULL; int err; assert_object_held_shared(vma->obj); @@ -2129,7 +2230,7 @@ int i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm) { struct drm_i915_gem_object *obj = vma->obj; struct i915_address_space *vm = vma->vm; - intel_wakeref_t wakeref = 0; + intel_wakeref_t wakeref = NULL; struct dma_fence *fence; int err; |
