diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c')
| -rw-r--r-- | drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 338 |
1 files changed, 199 insertions, 139 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 845023c14eb3..b057c2fa03a4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2008,2010 Intel Corporation */ @@ -9,10 +8,10 @@ #include <linux/sync_file.h> #include <linux/uaccess.h> +#include <drm/drm_auth.h> +#include <drm/drm_print.h> #include <drm/drm_syncobj.h> -#include "display/intel_frontbuffer.h" - #include "gem/i915_gem_ioctls.h" #include "gt/intel_context.h" #include "gt/intel_gpu_commands.h" @@ -30,6 +29,7 @@ #include "i915_gem_context.h" #include "i915_gem_evict.h" #include "i915_gem_ioctls.h" +#include "i915_reg.h" #include "i915_trace.h" #include "i915_user_extensions.h" @@ -53,13 +53,13 @@ enum { #define DBG_FORCE_RELOC 0 /* choose one of the above! */ }; -/* __EXEC_OBJECT_NO_RESERVE is BIT(31), defined in i915_vma.h */ -#define __EXEC_OBJECT_HAS_PIN BIT(30) -#define __EXEC_OBJECT_HAS_FENCE BIT(29) -#define __EXEC_OBJECT_USERPTR_INIT BIT(28) -#define __EXEC_OBJECT_NEEDS_MAP BIT(27) -#define __EXEC_OBJECT_NEEDS_BIAS BIT(26) -#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 26) /* all of the above + */ +/* __EXEC_OBJECT_ flags > BIT(29) defined in i915_vma.h */ +#define __EXEC_OBJECT_HAS_PIN BIT(29) +#define __EXEC_OBJECT_HAS_FENCE BIT(28) +#define __EXEC_OBJECT_USERPTR_INIT BIT(27) +#define __EXEC_OBJECT_NEEDS_MAP BIT(26) +#define __EXEC_OBJECT_NEEDS_BIAS BIT(25) +#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 25) /* all of the above + */ #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) #define __EXEC_HAS_RELOC BIT(31) @@ -143,7 +143,7 @@ enum { * we want to leave the object where it is and for all the existing relocations * to match. If the object is given a new address, or if userspace thinks the * object is elsewhere, we have to parse all the relocation entries and update - * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that + * the addresses. Userspace can set the I915_EXEC_NO_RELOC flag to hint that * all the target addresses in all of its objects match the value in the * relocation entries and that they all match the presumed offsets given by the * list of execbuffer objects. Using this knowledge, we know that if we haven't @@ -183,7 +183,7 @@ enum { * the object. Simple! ... The relocation entries are stored in user memory * and so to access them we have to copy them into a local buffer. That copy * has to avoid taking any pagefaults as they may lead back to a GEM object - * requiring the struct_mutex (i.e. recursive deadlock). So once again we split + * requiring the vm->mutex (i.e. recursive deadlock). So once again we split * the relocation into multiple passes. First we try to do everything within an * atomic context (avoid the pagefaults) which requires that we never wait. If * we detect that we may wait, or if we need to fault, then we have to fallback @@ -252,6 +252,8 @@ struct i915_execbuffer { struct intel_gt *gt; /* gt for the execbuf */ struct intel_context *context; /* logical state for the request */ struct i915_gem_context *gem_context; /** caller's context */ + intel_wakeref_t wakeref; + intel_wakeref_t wakeref_gt0; /** our requests to build */ struct i915_request *requests[MAX_ENGINE_INSTANCE + 1]; @@ -301,7 +303,7 @@ struct i915_execbuffer { struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */ /** - * Indicate either the size of the hastable used to resolve + * Indicate either the size of the hashtable used to resolve * relocation handles, or if negative that we are using a direct * index into the execobj[]. */ @@ -320,7 +322,7 @@ static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle); static void eb_unpin_engine(struct i915_execbuffer *eb); static void eb_capture_release(struct i915_execbuffer *eb); -static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) +static bool eb_use_cmdparser(const struct i915_execbuffer *eb) { return intel_engine_requires_cmd_parser(eb->context->engine) || (intel_engine_using_cmd_parser(eb->context->engine) && @@ -336,7 +338,7 @@ static int eb_create(struct i915_execbuffer *eb) * Without a 1:1 association between relocation handles and * the execobject[] index, we instead create a hashtable. * We size it dynamically based on available memory, starting - * first with 1:1 assocative hash and scaling back until + * first with 1:1 associative hash and scaling back until * the allocation succeeds. * * Later on we use a positive lut_size to indicate we are @@ -378,22 +380,25 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, const struct i915_vma *vma, unsigned int flags) { - if (vma->node.size < entry->pad_to_size) + const u64 start = i915_vma_offset(vma); + const u64 size = i915_vma_size(vma); + + if (size < entry->pad_to_size) return true; - if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)) + if (entry->alignment && !IS_ALIGNED(start, entry->alignment)) return true; if (flags & EXEC_OBJECT_PINNED && - vma->node.start != entry->offset) + start != entry->offset) return true; if (flags & __EXEC_OBJECT_NEEDS_BIAS && - vma->node.start < BATCH_OFFSET_BIAS) + start < BATCH_OFFSET_BIAS) return true; if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && - (vma->node.start + vma->node.size + 4095) >> 32) + (start + size + 4095) >> 32) return true; if (flags & __EXEC_OBJECT_NEEDS_MAP && @@ -429,7 +434,7 @@ static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry, return pin_flags; } -static inline int +static int eb_pin_vma(struct i915_execbuffer *eb, const struct drm_i915_gem_exec_object2 *entry, struct eb_vma *ev) @@ -439,7 +444,7 @@ eb_pin_vma(struct i915_execbuffer *eb, int err; if (vma->node.size) - pin_flags = vma->node.start; + pin_flags = __i915_vma_offset(vma); else pin_flags = entry->offset & PIN_OFFSET_MASK; @@ -482,7 +487,7 @@ eb_pin_vma(struct i915_execbuffer *eb, return 0; } -static inline void +static void eb_unreserve_vma(struct eb_vma *ev) { if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE)) @@ -544,7 +549,7 @@ eb_validate_vma(struct i915_execbuffer *eb, return 0; } -static inline bool +static bool is_batch_buffer(struct i915_execbuffer *eb, unsigned int buffer_idx) { return eb->args->flags & I915_EXEC_BATCH_FIRST ? @@ -624,8 +629,8 @@ eb_add_vma(struct i915_execbuffer *eb, return 0; } -static inline int use_cpu_reloc(const struct reloc_cache *cache, - const struct drm_i915_gem_object *obj) +static int use_cpu_reloc(const struct reloc_cache *cache, + const struct drm_i915_gem_object *obj) { if (!i915_gem_object_has_struct_page(obj)) return false; @@ -636,9 +641,15 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache, if (DBG_FORCE_RELOC == FORCE_GTT_RELOC) return false; + /* + * For objects created by userspace through GEM_CREATE with pat_index + * set by set_pat extension, i915_gem_object_has_cache_level() always + * return true, otherwise the call would fall back to checking whether + * the object is un-cached. + */ return (cache->has_llc || obj->cache_dirty || - obj->cache_level != I915_CACHE_NONE); + !i915_gem_object_has_cache_level(obj, I915_CACHE_NONE)); } static int eb_reserve_vma(struct i915_execbuffer *eb, @@ -662,8 +673,8 @@ static int eb_reserve_vma(struct i915_execbuffer *eb, if (err) return err; - if (entry->offset != vma->node.start) { - entry->offset = vma->node.start | UPDATE; + if (entry->offset != i915_vma_offset(vma)) { + entry->offset = i915_vma_offset(vma) | UPDATE; eb->args->flags |= __EXEC_HAS_RELOC; } @@ -726,35 +737,71 @@ static int eb_reserve(struct i915_execbuffer *eb) struct eb_vma *ev; unsigned int pass; int err = 0; - bool unpinned; /* - * Attempt to pin all of the buffers into the GTT. - * This is done in 2 phases: + * We have one more buffers that we couldn't bind, which could be due to + * various reasons. To resolve this we have 4 passes, with every next + * level turning the screws tighter: * - * 1. Unbind all objects that do not match the GTT constraints for - * the execbuffer (fenceable, mappable, alignment etc). - * 2. Bind new objects. + * 0. Unbind all objects that do not match the GTT constraints for the + * execbuffer (fenceable, mappable, alignment etc). Bind all new + * objects. This avoids unnecessary unbinding of later objects in order + * to make room for the earlier objects *unless* we need to defragment. * - * This avoid unnecessary unbinding of later objects in order to make - * room for the earlier objects *unless* we need to defragment. + * 1. Reorder the buffers, where objects with the most restrictive + * placement requirements go first (ignoring fixed location buffers for + * now). For example, objects needing the mappable aperture (the first + * 256M of GTT), should go first vs objects that can be placed just + * about anywhere. Repeat the previous pass. * - * Defragmenting is skipped if all objects are pinned at a fixed location. + * 2. Consider buffers that are pinned at a fixed location. Also try to + * evict the entire VM this time, leaving only objects that we were + * unable to lock. Try again to bind the buffers. (still using the new + * buffer order). + * + * 3. We likely have object lock contention for one or more stubborn + * objects in the VM, for which we need to evict to make forward + * progress (perhaps we are fighting the shrinker?). When evicting the + * VM this time around, anything that we can't lock we now track using + * the busy_bo, using the full lock (after dropping the vm->mutex to + * prevent deadlocks), instead of trylock. We then continue to evict the + * VM, this time with the stubborn object locked, which we can now + * hopefully unbind (if still bound in the VM). Repeat until the VM is + * evicted. Finally we should be able bind everything. */ - for (pass = 0; pass <= 2; pass++) { + for (pass = 0; pass <= 3; pass++) { int pin_flags = PIN_USER | PIN_VALIDATE; if (pass == 0) pin_flags |= PIN_NONBLOCK; if (pass >= 1) - unpinned = eb_unbind(eb, pass == 2); + eb_unbind(eb, pass >= 2); if (pass == 2) { err = mutex_lock_interruptible(&eb->context->vm->mutex); if (!err) { - err = i915_gem_evict_vm(eb->context->vm, &eb->ww); + err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL); + mutex_unlock(&eb->context->vm->mutex); + } + if (err) + return err; + } + + if (pass == 3) { +retry: + err = mutex_lock_interruptible(&eb->context->vm->mutex); + if (!err) { + struct drm_i915_gem_object *busy_bo = NULL; + + err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo); mutex_unlock(&eb->context->vm->mutex); + if (err && busy_bo) { + err = i915_gem_object_lock(busy_bo, &eb->ww); + i915_gem_object_put(busy_bo); + if (!err) + goto retry; + } } if (err) return err; @@ -778,7 +825,7 @@ static int eb_select_context(struct i915_execbuffer *eb) struct i915_gem_context *ctx; ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1); - if (unlikely(IS_ERR(ctx))) + if (IS_ERR(ctx)) return PTR_ERR(ctx); eb->gem_context = ctx; @@ -868,7 +915,7 @@ static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle) */ if (i915_gem_context_uses_protected_content(eb->gem_context) && i915_gem_object_is_protected(obj)) { - err = intel_pxp_key_check(&vm->gt->pxp, obj, true); + err = intel_pxp_key_check(intel_bo_to_drm_bo(obj), true); if (err) { i915_gem_object_put(obj); return ERR_PTR(err); @@ -983,8 +1030,8 @@ static int eb_validate_vmas(struct i915_execbuffer *eb) return err; if (!err) { - if (entry->offset != vma->node.start) { - entry->offset = vma->node.start | UPDATE; + if (entry->offset != i915_vma_offset(vma)) { + entry->offset = i915_vma_offset(vma) | UPDATE; eb->args->flags |= __EXEC_HAS_RELOC; } } else { @@ -1061,11 +1108,11 @@ static void eb_destroy(const struct i915_execbuffer *eb) kfree(eb->buckets); } -static inline u64 +static u64 relocation_target(const struct drm_i915_gem_relocation_entry *reloc, const struct i915_vma *target) { - return gen8_canonical_addr((int)reloc->delta + target->node.start); + return gen8_canonical_addr((int)reloc->delta + i915_vma_offset(target)); } static void reloc_cache_init(struct reloc_cache *cache, @@ -1082,19 +1129,19 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->node.flags = 0; } -static inline void *unmask_page(unsigned long p) +static void *unmask_page(unsigned long p) { return (void *)(uintptr_t)(p & PAGE_MASK); } -static inline unsigned int unmask_flags(unsigned long p) +static unsigned int unmask_flags(unsigned long p) { return p & ~PAGE_MASK; } #define KMAP 0x4 /* after CLFLUSH_FLAGS */ -static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) +static struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) { struct drm_i915_private *i915 = container_of(cache, struct i915_execbuffer, reloc_cache)->i915; @@ -1110,7 +1157,7 @@ static void reloc_cache_unmap(struct reloc_cache *cache) vaddr = unmask_page(cache->vaddr); if (cache->vaddr & KMAP) - kunmap_atomic(vaddr); + kunmap_local(vaddr); else io_mapping_unmap_atomic((void __iomem *)vaddr); } @@ -1126,7 +1173,7 @@ static void reloc_cache_remap(struct reloc_cache *cache, if (cache->vaddr & KMAP) { struct page *page = i915_gem_object_get_page(obj, cache->page); - vaddr = kmap_atomic(page); + vaddr = kmap_local_page(page); cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; } else { @@ -1156,7 +1203,7 @@ static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer if (cache->vaddr & CLFLUSH_AFTER) mb(); - kunmap_atomic(vaddr); + kunmap_local(vaddr); i915_gem_object_finish_access(obj); } else { struct i915_ggtt *ggtt = cache_to_ggtt(cache); @@ -1188,7 +1235,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj, struct page *page; if (cache->vaddr) { - kunmap_atomic(unmask_page(cache->vaddr)); + kunmap_local(unmask_page(cache->vaddr)); } else { unsigned int flushes; int err; @@ -1210,7 +1257,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj, if (!obj->mm.dirty) set_page_dirty(page); - vaddr = kmap_atomic(page); + vaddr = kmap_local_page(page); cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; cache->page = pageno; @@ -1274,7 +1321,7 @@ static void *reloc_iomap(struct i915_vma *batch, if (err) /* no inactive aperture space, use cpu reloc */ return NULL; } else { - cache->node.start = vma->node.start; + cache->node.start = i915_ggtt_offset(vma); cache->node.mm = (void *)vma; } } @@ -1283,7 +1330,10 @@ static void *reloc_iomap(struct i915_vma *batch, if (drm_mm_node_allocated(&cache->node)) { ggtt->vm.insert_page(&ggtt->vm, i915_gem_object_get_dma_address(obj, page), - offset, I915_CACHE_NONE, 0); + offset, + i915_gem_get_pat_index(ggtt->vm.i915, + I915_CACHE_NONE), + 0); } else { offset += page << PAGE_SHIFT; } @@ -1333,8 +1383,9 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) */ if (flushes & CLFLUSH_AFTER) drm_clflush_virt_range(addr, sizeof(*addr)); - } else + } else { *addr = value; + } } static u64 @@ -1387,7 +1438,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { drm_dbg(&i915->drm, "reloc with multiple write domains: " "target %d offset %d " - "read %08x write %08x", + "read %08x write %08x\n", reloc->target_handle, (int) reloc->offset, reloc->read_domains, @@ -1398,7 +1449,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, & ~I915_GEM_GPU_DOMAINS)) { drm_dbg(&i915->drm, "reloc with read/write non-GPU domains: " "target %d offset %d " - "read %08x write %08x", + "read %08x write %08x\n", reloc->target_handle, (int) reloc->offset, reloc->read_domains, @@ -1423,7 +1474,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, reloc_cache_unmap(&eb->reloc_cache); mutex_lock(&vma->vm->mutex); err = i915_vma_bind(target->vma, - target->vma->obj->cache_level, + target->vma->obj->pat_index, PIN_GLOBAL, NULL, NULL); mutex_unlock(&vma->vm->mutex); reloc_cache_remap(&eb->reloc_cache, ev->vma->obj); @@ -1437,7 +1488,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, * more work needs to be done. */ if (!DBG_FORCE_RELOC && - gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset) + gen8_canonical_addr(i915_vma_offset(target->vma)) == reloc->presumed_offset) return 0; /* Check that the relocation address is valid... */ @@ -1481,7 +1532,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) u64_to_user_ptr(entry->relocs_ptr); unsigned long remain = entry->relocation_count; - if (unlikely(remain > N_RELOC(ULONG_MAX))) + if (unlikely(remain > N_RELOC(INT_MAX))) return -EINVAL; /* @@ -1518,36 +1569,36 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) do { u64 offset = eb_relocate_entry(eb, ev, r); - if (likely(offset == 0)) { - } else if ((s64)offset < 0) { + if (likely(offset == 0)) + continue; + + if ((s64)offset < 0) { remain = (int)offset; goto out; - } else { - /* - * Note that reporting an error now - * leaves everything in an inconsistent - * state as we have *already* changed - * the relocation value inside the - * object. As we have not changed the - * reloc.presumed_offset or will not - * change the execobject.offset, on the - * call we may not rewrite the value - * inside the object, leaving it - * dangling and causing a GPU hang. Unless - * userspace dynamically rebuilds the - * relocations on each execbuf rather than - * presume a static tree. - * - * We did previously check if the relocations - * were writable (access_ok), an error now - * would be a strange race with mprotect, - * having already demonstrated that we - * can read from this userspace address. - */ - offset = gen8_canonical_addr(offset & ~UPDATE); - __put_user(offset, - &urelocs[r - stack].presumed_offset); } + /* + * Note that reporting an error now + * leaves everything in an inconsistent + * state as we have *already* changed + * the relocation value inside the + * object. As we have not changed the + * reloc.presumed_offset or will not + * change the execobject.offset, on the + * call we may not rewrite the value + * inside the object, leaving it + * dangling and causing a GPU hang. Unless + * userspace dynamically rebuilds the + * relocations on each execbuf rather than + * presume a static tree. + * + * We did previously check if the relocations + * were writable (access_ok), an error now + * would be a strange race with mprotect, + * having already demonstrated that we + * can read from this userspace address. + */ + offset = gen8_canonical_addr(offset & ~UPDATE); + __put_user(offset, &urelocs[r - stack].presumed_offset); } while (r++, --count); urelocs += ARRAY_SIZE(stack); } while (remain); @@ -1589,7 +1640,7 @@ static int check_relocations(const struct drm_i915_gem_exec_object2 *entry) if (size == 0) return 0; - if (size > N_RELOC(ULONG_MAX)) + if (size > N_RELOC(INT_MAX)) return -EINVAL; addr = u64_to_user_ptr(entry->relocs_ptr); @@ -1629,7 +1680,7 @@ static int eb_copy_relocations(const struct i915_execbuffer *eb) urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); size = nreloc * sizeof(*relocs); - relocs = kvmalloc_array(size, 1, GFP_KERNEL); + relocs = kvmalloc_array(1, size, GFP_KERNEL); if (!relocs) { err = -ENOMEM; goto err; @@ -2101,18 +2152,13 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) eb->composite_fence ? eb->composite_fence : &eb->requests[j]->fence, - flags | __EXEC_OBJECT_NO_RESERVE); + flags | __EXEC_OBJECT_NO_RESERVE | + __EXEC_OBJECT_NO_REQUEST_AWAIT); } } #ifdef CONFIG_MMU_NOTIFIER if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) { - read_lock(&eb->i915->mm.notifier_lock); - - /* - * count is always at least 1, otherwise __EXEC_USERPTR_USED - * could not have been set - */ for (i = 0; i < count; i++) { struct eb_vma *ev = &eb->vma[i]; struct drm_i915_gem_object *obj = ev->vma->obj; @@ -2124,8 +2170,6 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) if (err) break; } - - read_unlock(&eb->i915->mm.notifier_lock); } #endif @@ -2148,7 +2192,8 @@ err_skip: return err; } -static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) +static int i915_gem_check_execbuffer(struct drm_i915_private *i915, + struct drm_i915_gem_execbuffer2 *exec) { if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS) return -EINVAL; @@ -2161,7 +2206,7 @@ static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) } if (exec->DR4 == 0xffffffff) { - DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); + drm_dbg(&i915->drm, "UXA submitting garbage DR4, fixing up\n"); exec->DR4 = 0; } if (exec->DR1 || exec->DR4) @@ -2178,8 +2223,8 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq) u32 *cs; int i; - if (GRAPHICS_VER(rq->engine->i915) != 7 || rq->engine->id != RCS0) { - drm_dbg(&rq->engine->i915->drm, "sol reset is gen7/rcs only\n"); + if (GRAPHICS_VER(rq->i915) != 7 || rq->engine->id != RCS0) { + drm_dbg(&rq->i915->drm, "sol reset is gen7/rcs only\n"); return -EINVAL; } @@ -2365,7 +2410,7 @@ static int eb_request_submit(struct i915_execbuffer *eb, } err = rq->context->engine->emit_bb_start(rq, - batch->node.start + + i915_vma_offset(batch) + eb->batch_start_offset, batch_len, eb->batch_flags); @@ -2376,7 +2421,7 @@ static int eb_request_submit(struct i915_execbuffer *eb, GEM_BUG_ON(intel_context_is_parallel(rq->context)); GEM_BUG_ON(eb->batch_start_offset); err = rq->context->engine->emit_bb_start(rq, - eb->trampoline->node.start + + i915_vma_offset(eb->trampoline) + batch_len, 0, 0); if (err) return err; @@ -2406,17 +2451,12 @@ static int eb_submit(struct i915_execbuffer *eb) return err; } -static int num_vcs_engines(struct drm_i915_private *i915) -{ - return hweight_long(VDBOX_MASK(to_gt(i915))); -} - /* * Find one BSD ring to dispatch the corresponding BSD command. * The engine index is returned. */ static unsigned int -gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, +gen8_dispatch_bsd_engine(struct drm_i915_private *i915, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; @@ -2424,7 +2464,7 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, /* Check whether the file_priv has already selected one ring. */ if ((int)file_priv->bsd_engine < 0) file_priv->bsd_engine = - prandom_u32_max(num_vcs_engines(dev_priv)); + get_random_u32_below(i915->engine_uabi_class_count[I915_ENGINE_CLASS_VIDEO]); return file_priv->bsd_engine; } @@ -2504,7 +2544,7 @@ static int eb_pin_timeline(struct i915_execbuffer *eb, struct intel_context *ce, /* * Error path, cannot use intel_context_timeline_lock as - * that is user interruptable and this clean up step + * that is user interruptible and this clean up step * must be done. */ mutex_lock(&ce->timeline->mutex); @@ -2612,7 +2652,8 @@ eb_select_legacy_ring(struct i915_execbuffer *eb) return -1; } - if (user_ring_id == I915_EXEC_BSD && num_vcs_engines(i915) > 1) { + if (user_ring_id == I915_EXEC_BSD && + i915->engine_uabi_class_count[I915_ENGINE_CLASS_VIDEO] > 1) { unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; if (bsd_idx == I915_EXEC_BSD_DEFAULT) { @@ -2644,6 +2685,7 @@ static int eb_select_engine(struct i915_execbuffer *eb) { struct intel_context *ce, *child; + struct intel_gt *gt; unsigned int idx; int err; @@ -2667,10 +2709,17 @@ eb_select_engine(struct i915_execbuffer *eb) } } eb->num_batches = ce->parallel.number_children + 1; + gt = ce->engine->gt; for_each_child(ce, child) intel_context_get(child); - intel_gt_pm_get(ce->engine->gt); + eb->wakeref = intel_gt_pm_get(ce->engine->gt); + /* + * Keep GT0 active on MTL so that i915_vma_parked() doesn't + * free VMAs while execbuf ioctl is validating VMAs. + */ + if (gt->info.id) + eb->wakeref_gt0 = intel_gt_pm_get(to_gt(gt->i915)); if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { err = intel_context_alloc_state(ce); @@ -2709,7 +2758,10 @@ eb_select_engine(struct i915_execbuffer *eb) return err; err: - intel_gt_pm_put(ce->engine->gt); + if (gt->info.id) + intel_gt_pm_put(to_gt(gt->i915), eb->wakeref_gt0); + + intel_gt_pm_put(ce->engine->gt, eb->wakeref); for_each_child(ce, child) intel_context_put(child); intel_context_put(ce); @@ -2722,7 +2774,13 @@ eb_put_engine(struct i915_execbuffer *eb) struct intel_context *child; i915_vm_put(eb->context->vm); - intel_gt_pm_put(eb->gt); + /* + * This works in conjunction with eb_select_engine() to prevent + * i915_vma_parked() from interfering while execbuf validates vmas. + */ + if (eb->gt->info.id) + intel_gt_pm_put(to_gt(eb->gt->i915), eb->wakeref_gt0); + intel_gt_pm_put(eb->context->engine->gt, eb->wakeref); for_each_child(eb->context, child) intel_context_put(child); intel_context_put(eb->context); @@ -2799,7 +2857,8 @@ add_timeline_fence_array(struct i915_execbuffer *eb, syncobj = drm_syncobj_find(eb->file, user_fence.handle); if (!syncobj) { - DRM_DEBUG("Invalid syncobj handle provided\n"); + drm_dbg(&eb->i915->drm, + "Invalid syncobj handle provided\n"); return -ENOENT; } @@ -2807,7 +2866,8 @@ add_timeline_fence_array(struct i915_execbuffer *eb, if (!fence && user_fence.flags && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { - DRM_DEBUG("Syncobj handle has no fence\n"); + drm_dbg(&eb->i915->drm, + "Syncobj handle has no fence\n"); drm_syncobj_put(syncobj); return -EINVAL; } @@ -2816,7 +2876,9 @@ add_timeline_fence_array(struct i915_execbuffer *eb, err = dma_fence_chain_find_seqno(&fence, point); if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { - DRM_DEBUG("Syncobj handle missing requested point %llu\n", point); + drm_dbg(&eb->i915->drm, + "Syncobj handle missing requested point %llu\n", + point); dma_fence_put(fence); drm_syncobj_put(syncobj); return err; @@ -2842,7 +2904,8 @@ add_timeline_fence_array(struct i915_execbuffer *eb, * 0) would break the timeline. */ if (user_fence.flags & I915_EXEC_FENCE_WAIT) { - DRM_DEBUG("Trying to wait & signal the same timeline point.\n"); + drm_dbg(&eb->i915->drm, + "Trying to wait & signal the same timeline point.\n"); dma_fence_put(fence); drm_syncobj_put(syncobj); return -EINVAL; @@ -2913,14 +2976,16 @@ static int add_fence_array(struct i915_execbuffer *eb) syncobj = drm_syncobj_find(eb->file, user_fence.handle); if (!syncobj) { - DRM_DEBUG("Invalid syncobj handle provided\n"); + drm_dbg(&eb->i915->drm, + "Invalid syncobj handle provided\n"); return -ENOENT; } if (user_fence.flags & I915_EXEC_FENCE_WAIT) { fence = drm_syncobj_fence_get(syncobj); if (!fence) { - DRM_DEBUG("Syncobj handle has no fence\n"); + drm_dbg(&eb->i915->drm, + "Syncobj handle has no fence\n"); drm_syncobj_put(syncobj); return -EINVAL; } @@ -2954,11 +3019,6 @@ await_fence_array(struct i915_execbuffer *eb, int err; for (n = 0; n < eb->num_fences; n++) { - struct drm_syncobj *syncobj; - unsigned int flags; - - syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2); - if (!eb->fences[n].dma_fence) continue; @@ -3441,6 +3501,13 @@ err_request: eb.composite_fence : &eb.requests[0]->fence); + if (unlikely(eb.gem_context->syncobj)) { + drm_syncobj_replace_fence(eb.gem_context->syncobj, + eb.composite_fence ? + eb.composite_fence : + &eb.requests[0]->fence); + } + if (out_fence) { if (err == 0) { fd_install(out_fence_fd, out_fence->file); @@ -3452,13 +3519,6 @@ err_request: } } - if (unlikely(eb.gem_context->syncobj)) { - drm_syncobj_replace_fence(eb.gem_context->syncobj, - eb.composite_fence ? - eb.composite_fence : - &eb.requests[0]->fence); - } - if (!out_fence && eb.composite_fence) dma_fence_put(eb.composite_fence); @@ -3520,7 +3580,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - err = i915_gem_check_execbuffer(args); + err = i915_gem_check_execbuffer(i915, args); if (err) return err; |
