summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c603
1 files changed, 335 insertions, 268 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 3a5b247be738..b057c2fa03a4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1,18 +1,17 @@
+// SPDX-License-Identifier: MIT
/*
- * SPDX-License-Identifier: MIT
- *
* Copyright © 2008,2010 Intel Corporation
*/
-#include <linux/intel-iommu.h>
#include <linux/dma-resv.h>
+#include <linux/highmem.h>
#include <linux/sync_file.h>
#include <linux/uaccess.h>
+#include <drm/drm_auth.h>
+#include <drm/drm_print.h>
#include <drm/drm_syncobj.h>
-#include "display/intel_frontbuffer.h"
-
#include "gem/i915_gem_ioctls.h"
#include "gt/intel_context.h"
#include "gt/intel_gpu_commands.h"
@@ -23,13 +22,16 @@
#include "pxp/intel_pxp.h"
+#include "i915_cmd_parser.h"
#include "i915_drv.h"
+#include "i915_file_private.h"
#include "i915_gem_clflush.h"
#include "i915_gem_context.h"
+#include "i915_gem_evict.h"
#include "i915_gem_ioctls.h"
+#include "i915_reg.h"
#include "i915_trace.h"
#include "i915_user_extensions.h"
-#include "i915_vma_snapshot.h"
struct eb_vma {
struct i915_vma *vma;
@@ -51,13 +53,13 @@ enum {
#define DBG_FORCE_RELOC 0 /* choose one of the above! */
};
-/* __EXEC_OBJECT_NO_RESERVE is BIT(31), defined in i915_vma.h */
-#define __EXEC_OBJECT_HAS_PIN BIT(30)
-#define __EXEC_OBJECT_HAS_FENCE BIT(29)
-#define __EXEC_OBJECT_USERPTR_INIT BIT(28)
-#define __EXEC_OBJECT_NEEDS_MAP BIT(27)
-#define __EXEC_OBJECT_NEEDS_BIAS BIT(26)
-#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 26) /* all of the above + */
+/* __EXEC_OBJECT_ flags > BIT(29) defined in i915_vma.h */
+#define __EXEC_OBJECT_HAS_PIN BIT(29)
+#define __EXEC_OBJECT_HAS_FENCE BIT(28)
+#define __EXEC_OBJECT_USERPTR_INIT BIT(27)
+#define __EXEC_OBJECT_NEEDS_MAP BIT(26)
+#define __EXEC_OBJECT_NEEDS_BIAS BIT(25)
+#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 25) /* all of the above + */
#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
#define __EXEC_HAS_RELOC BIT(31)
@@ -141,7 +143,7 @@ enum {
* we want to leave the object where it is and for all the existing relocations
* to match. If the object is given a new address, or if userspace thinks the
* object is elsewhere, we have to parse all the relocation entries and update
- * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that
+ * the addresses. Userspace can set the I915_EXEC_NO_RELOC flag to hint that
* all the target addresses in all of its objects match the value in the
* relocation entries and that they all match the presumed offsets given by the
* list of execbuffer objects. Using this knowledge, we know that if we haven't
@@ -181,7 +183,7 @@ enum {
* the object. Simple! ... The relocation entries are stored in user memory
* and so to access them we have to copy them into a local buffer. That copy
* has to avoid taking any pagefaults as they may lead back to a GEM object
- * requiring the struct_mutex (i.e. recursive deadlock). So once again we split
+ * requiring the vm->mutex (i.e. recursive deadlock). So once again we split
* the relocation into multiple passes. First we try to do everything within an
* atomic context (avoid the pagefaults) which requires that we never wait. If
* we detect that we may wait, or if we need to fault, then we have to fallback
@@ -250,6 +252,8 @@ struct i915_execbuffer {
struct intel_gt *gt; /* gt for the execbuf */
struct intel_context *context; /* logical state for the request */
struct i915_gem_context *gem_context; /** caller's context */
+ intel_wakeref_t wakeref;
+ intel_wakeref_t wakeref_gt0;
/** our requests to build */
struct i915_request *requests[MAX_ENGINE_INSTANCE + 1];
@@ -299,7 +303,7 @@ struct i915_execbuffer {
struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */
/**
- * Indicate either the size of the hastable used to resolve
+ * Indicate either the size of the hashtable used to resolve
* relocation handles, or if negative that we are using a direct
* index into the execobj[].
*/
@@ -318,7 +322,7 @@ static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle);
static void eb_unpin_engine(struct i915_execbuffer *eb);
static void eb_capture_release(struct i915_execbuffer *eb);
-static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
+static bool eb_use_cmdparser(const struct i915_execbuffer *eb)
{
return intel_engine_requires_cmd_parser(eb->context->engine) ||
(intel_engine_using_cmd_parser(eb->context->engine) &&
@@ -334,7 +338,7 @@ static int eb_create(struct i915_execbuffer *eb)
* Without a 1:1 association between relocation handles and
* the execobject[] index, we instead create a hashtable.
* We size it dynamically based on available memory, starting
- * first with 1:1 assocative hash and scaling back until
+ * first with 1:1 associative hash and scaling back until
* the allocation succeeds.
*
* Later on we use a positive lut_size to indicate we are
@@ -376,22 +380,25 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
const struct i915_vma *vma,
unsigned int flags)
{
- if (vma->node.size < entry->pad_to_size)
+ const u64 start = i915_vma_offset(vma);
+ const u64 size = i915_vma_size(vma);
+
+ if (size < entry->pad_to_size)
return true;
- if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
+ if (entry->alignment && !IS_ALIGNED(start, entry->alignment))
return true;
if (flags & EXEC_OBJECT_PINNED &&
- vma->node.start != entry->offset)
+ start != entry->offset)
return true;
if (flags & __EXEC_OBJECT_NEEDS_BIAS &&
- vma->node.start < BATCH_OFFSET_BIAS)
+ start < BATCH_OFFSET_BIAS)
return true;
if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
- (vma->node.start + vma->node.size + 4095) >> 32)
+ (start + size + 4095) >> 32)
return true;
if (flags & __EXEC_OBJECT_NEEDS_MAP &&
@@ -427,7 +434,7 @@ static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry,
return pin_flags;
}
-static inline int
+static int
eb_pin_vma(struct i915_execbuffer *eb,
const struct drm_i915_gem_exec_object2 *entry,
struct eb_vma *ev)
@@ -437,11 +444,11 @@ eb_pin_vma(struct i915_execbuffer *eb,
int err;
if (vma->node.size)
- pin_flags = vma->node.start;
+ pin_flags = __i915_vma_offset(vma);
else
pin_flags = entry->offset & PIN_OFFSET_MASK;
- pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
+ pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED | PIN_VALIDATE;
if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT))
pin_flags |= PIN_GLOBAL;
@@ -459,17 +466,15 @@ eb_pin_vma(struct i915_execbuffer *eb,
entry->pad_to_size,
entry->alignment,
eb_pin_flags(entry, ev->flags) |
- PIN_USER | PIN_NOEVICT);
+ PIN_USER | PIN_NOEVICT | PIN_VALIDATE);
if (unlikely(err))
return err;
}
if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
err = i915_vma_pin_fence(vma);
- if (unlikely(err)) {
- i915_vma_unpin(vma);
+ if (unlikely(err))
return err;
- }
if (vma->fence)
ev->flags |= __EXEC_OBJECT_HAS_FENCE;
@@ -482,16 +487,12 @@ eb_pin_vma(struct i915_execbuffer *eb,
return 0;
}
-static inline void
+static void
eb_unreserve_vma(struct eb_vma *ev)
{
- if (!(ev->flags & __EXEC_OBJECT_HAS_PIN))
- return;
-
if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
__i915_vma_unpin_fence(ev->vma);
- __i915_vma_unpin(ev->vma);
ev->flags &= ~__EXEC_OBJECT_RESERVED;
}
@@ -548,7 +549,7 @@ eb_validate_vma(struct i915_execbuffer *eb,
return 0;
}
-static inline bool
+static bool
is_batch_buffer(struct i915_execbuffer *eb, unsigned int buffer_idx)
{
return eb->args->flags & I915_EXEC_BATCH_FIRST ?
@@ -628,8 +629,8 @@ eb_add_vma(struct i915_execbuffer *eb,
return 0;
}
-static inline int use_cpu_reloc(const struct reloc_cache *cache,
- const struct drm_i915_gem_object *obj)
+static int use_cpu_reloc(const struct reloc_cache *cache,
+ const struct drm_i915_gem_object *obj)
{
if (!i915_gem_object_has_struct_page(obj))
return false;
@@ -640,9 +641,15 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
if (DBG_FORCE_RELOC == FORCE_GTT_RELOC)
return false;
+ /*
+ * For objects created by userspace through GEM_CREATE with pat_index
+ * set by set_pat extension, i915_gem_object_has_cache_level() always
+ * return true, otherwise the call would fall back to checking whether
+ * the object is un-cached.
+ */
return (cache->has_llc ||
obj->cache_dirty ||
- obj->cache_level != I915_CACHE_NONE);
+ !i915_gem_object_has_cache_level(obj, I915_CACHE_NONE));
}
static int eb_reserve_vma(struct i915_execbuffer *eb,
@@ -666,17 +673,15 @@ static int eb_reserve_vma(struct i915_execbuffer *eb,
if (err)
return err;
- if (entry->offset != vma->node.start) {
- entry->offset = vma->node.start | UPDATE;
+ if (entry->offset != i915_vma_offset(vma)) {
+ entry->offset = i915_vma_offset(vma) | UPDATE;
eb->args->flags |= __EXEC_HAS_RELOC;
}
if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
err = i915_vma_pin_fence(vma);
- if (unlikely(err)) {
- i915_vma_unpin(vma);
+ if (unlikely(err))
return err;
- }
if (vma->fence)
ev->flags |= __EXEC_OBJECT_HAS_FENCE;
@@ -688,85 +693,131 @@ static int eb_reserve_vma(struct i915_execbuffer *eb,
return 0;
}
-static int eb_reserve(struct i915_execbuffer *eb)
+static bool eb_unbind(struct i915_execbuffer *eb, bool force)
{
const unsigned int count = eb->buffer_count;
- unsigned int pin_flags = PIN_USER | PIN_NONBLOCK;
+ unsigned int i;
struct list_head last;
+ bool unpinned = false;
+
+ /* Resort *all* the objects into priority order */
+ INIT_LIST_HEAD(&eb->unbound);
+ INIT_LIST_HEAD(&last);
+
+ for (i = 0; i < count; i++) {
+ struct eb_vma *ev = &eb->vma[i];
+ unsigned int flags = ev->flags;
+
+ if (!force && flags & EXEC_OBJECT_PINNED &&
+ flags & __EXEC_OBJECT_HAS_PIN)
+ continue;
+
+ unpinned = true;
+ eb_unreserve_vma(ev);
+
+ if (flags & EXEC_OBJECT_PINNED)
+ /* Pinned must have their slot */
+ list_add(&ev->bind_link, &eb->unbound);
+ else if (flags & __EXEC_OBJECT_NEEDS_MAP)
+ /* Map require the lowest 256MiB (aperture) */
+ list_add_tail(&ev->bind_link, &eb->unbound);
+ else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
+ /* Prioritise 4GiB region for restricted bo */
+ list_add(&ev->bind_link, &last);
+ else
+ list_add_tail(&ev->bind_link, &last);
+ }
+
+ list_splice_tail(&last, &eb->unbound);
+ return unpinned;
+}
+
+static int eb_reserve(struct i915_execbuffer *eb)
+{
struct eb_vma *ev;
- unsigned int i, pass;
+ unsigned int pass;
int err = 0;
/*
- * Attempt to pin all of the buffers into the GTT.
- * This is done in 3 phases:
+ * We have one more buffers that we couldn't bind, which could be due to
+ * various reasons. To resolve this we have 4 passes, with every next
+ * level turning the screws tighter:
+ *
+ * 0. Unbind all objects that do not match the GTT constraints for the
+ * execbuffer (fenceable, mappable, alignment etc). Bind all new
+ * objects. This avoids unnecessary unbinding of later objects in order
+ * to make room for the earlier objects *unless* we need to defragment.
*
- * 1a. Unbind all objects that do not match the GTT constraints for
- * the execbuffer (fenceable, mappable, alignment etc).
- * 1b. Increment pin count for already bound objects.
- * 2. Bind new objects.
- * 3. Decrement pin count.
+ * 1. Reorder the buffers, where objects with the most restrictive
+ * placement requirements go first (ignoring fixed location buffers for
+ * now). For example, objects needing the mappable aperture (the first
+ * 256M of GTT), should go first vs objects that can be placed just
+ * about anywhere. Repeat the previous pass.
*
- * This avoid unnecessary unbinding of later objects in order to make
- * room for the earlier objects *unless* we need to defragment.
+ * 2. Consider buffers that are pinned at a fixed location. Also try to
+ * evict the entire VM this time, leaving only objects that we were
+ * unable to lock. Try again to bind the buffers. (still using the new
+ * buffer order).
+ *
+ * 3. We likely have object lock contention for one or more stubborn
+ * objects in the VM, for which we need to evict to make forward
+ * progress (perhaps we are fighting the shrinker?). When evicting the
+ * VM this time around, anything that we can't lock we now track using
+ * the busy_bo, using the full lock (after dropping the vm->mutex to
+ * prevent deadlocks), instead of trylock. We then continue to evict the
+ * VM, this time with the stubborn object locked, which we can now
+ * hopefully unbind (if still bound in the VM). Repeat until the VM is
+ * evicted. Finally we should be able bind everything.
*/
- pass = 0;
- do {
- list_for_each_entry(ev, &eb->unbound, bind_link) {
- err = eb_reserve_vma(eb, ev, pin_flags);
- if (err)
- break;
- }
- if (err != -ENOSPC)
- return err;
+ for (pass = 0; pass <= 3; pass++) {
+ int pin_flags = PIN_USER | PIN_VALIDATE;
- /* Resort *all* the objects into priority order */
- INIT_LIST_HEAD(&eb->unbound);
- INIT_LIST_HEAD(&last);
- for (i = 0; i < count; i++) {
- unsigned int flags;
+ if (pass == 0)
+ pin_flags |= PIN_NONBLOCK;
- ev = &eb->vma[i];
- flags = ev->flags;
- if (flags & EXEC_OBJECT_PINNED &&
- flags & __EXEC_OBJECT_HAS_PIN)
- continue;
-
- eb_unreserve_vma(ev);
+ if (pass >= 1)
+ eb_unbind(eb, pass >= 2);
- if (flags & EXEC_OBJECT_PINNED)
- /* Pinned must have their slot */
- list_add(&ev->bind_link, &eb->unbound);
- else if (flags & __EXEC_OBJECT_NEEDS_MAP)
- /* Map require the lowest 256MiB (aperture) */
- list_add_tail(&ev->bind_link, &eb->unbound);
- else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
- /* Prioritise 4GiB region for restricted bo */
- list_add(&ev->bind_link, &last);
- else
- list_add_tail(&ev->bind_link, &last);
+ if (pass == 2) {
+ err = mutex_lock_interruptible(&eb->context->vm->mutex);
+ if (!err) {
+ err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL);
+ mutex_unlock(&eb->context->vm->mutex);
+ }
+ if (err)
+ return err;
}
- list_splice_tail(&last, &eb->unbound);
-
- switch (pass++) {
- case 0:
- break;
- case 1:
- /* Too fragmented, unbind everything and retry */
- mutex_lock(&eb->context->vm->mutex);
- err = i915_gem_evict_vm(eb->context->vm);
- mutex_unlock(&eb->context->vm->mutex);
+ if (pass == 3) {
+retry:
+ err = mutex_lock_interruptible(&eb->context->vm->mutex);
+ if (!err) {
+ struct drm_i915_gem_object *busy_bo = NULL;
+
+ err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo);
+ mutex_unlock(&eb->context->vm->mutex);
+ if (err && busy_bo) {
+ err = i915_gem_object_lock(busy_bo, &eb->ww);
+ i915_gem_object_put(busy_bo);
+ if (!err)
+ goto retry;
+ }
+ }
if (err)
return err;
- break;
+ }
- default:
- return -ENOSPC;
+ list_for_each_entry(ev, &eb->unbound, bind_link) {
+ err = eb_reserve_vma(eb, ev, pin_flags);
+ if (err)
+ break;
}
- pin_flags = PIN_USER;
- } while (1);
+ if (err != -ENOSPC)
+ break;
+ }
+
+ return err;
}
static int eb_select_context(struct i915_execbuffer *eb)
@@ -774,7 +825,7 @@ static int eb_select_context(struct i915_execbuffer *eb)
struct i915_gem_context *ctx;
ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1);
- if (unlikely(IS_ERR(ctx)))
+ if (IS_ERR(ctx))
return PTR_ERR(ctx);
eb->gem_context = ctx;
@@ -864,7 +915,7 @@ static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle)
*/
if (i915_gem_context_uses_protected_content(eb->gem_context) &&
i915_gem_object_is_protected(obj)) {
- err = intel_pxp_key_check(&vm->gt->pxp, obj, true);
+ err = intel_pxp_key_check(intel_bo_to_drm_bo(obj), true);
if (err) {
i915_gem_object_put(obj);
return ERR_PTR(err);
@@ -979,8 +1030,8 @@ static int eb_validate_vmas(struct i915_execbuffer *eb)
return err;
if (!err) {
- if (entry->offset != vma->node.start) {
- entry->offset = vma->node.start | UPDATE;
+ if (entry->offset != i915_vma_offset(vma)) {
+ entry->offset = i915_vma_offset(vma) | UPDATE;
eb->args->flags |= __EXEC_HAS_RELOC;
}
} else {
@@ -994,11 +1045,10 @@ static int eb_validate_vmas(struct i915_execbuffer *eb)
}
}
- if (!(ev->flags & EXEC_OBJECT_WRITE)) {
- err = dma_resv_reserve_shared(vma->obj->base.resv, 1);
- if (err)
- return err;
- }
+ /* Reserve enough slots to accommodate composite fences */
+ err = dma_resv_reserve_fences(vma->obj->base.resv, eb->num_batches);
+ if (err)
+ return err;
GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
eb_vma_misplaced(&eb->exec[i], vma, ev->flags));
@@ -1058,11 +1108,11 @@ static void eb_destroy(const struct i915_execbuffer *eb)
kfree(eb->buckets);
}
-static inline u64
+static u64
relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
const struct i915_vma *target)
{
- return gen8_canonical_addr((int)reloc->delta + target->node.start);
+ return gen8_canonical_addr((int)reloc->delta + i915_vma_offset(target));
}
static void reloc_cache_init(struct reloc_cache *cache,
@@ -1079,23 +1129,23 @@ static void reloc_cache_init(struct reloc_cache *cache,
cache->node.flags = 0;
}
-static inline void *unmask_page(unsigned long p)
+static void *unmask_page(unsigned long p)
{
return (void *)(uintptr_t)(p & PAGE_MASK);
}
-static inline unsigned int unmask_flags(unsigned long p)
+static unsigned int unmask_flags(unsigned long p)
{
return p & ~PAGE_MASK;
}
#define KMAP 0x4 /* after CLFLUSH_FLAGS */
-static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
+static struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
{
struct drm_i915_private *i915 =
container_of(cache, struct i915_execbuffer, reloc_cache)->i915;
- return &i915->ggtt;
+ return to_gt(i915)->ggtt;
}
static void reloc_cache_unmap(struct reloc_cache *cache)
@@ -1107,7 +1157,7 @@ static void reloc_cache_unmap(struct reloc_cache *cache)
vaddr = unmask_page(cache->vaddr);
if (cache->vaddr & KMAP)
- kunmap_atomic(vaddr);
+ kunmap_local(vaddr);
else
io_mapping_unmap_atomic((void __iomem *)vaddr);
}
@@ -1123,7 +1173,7 @@ static void reloc_cache_remap(struct reloc_cache *cache,
if (cache->vaddr & KMAP) {
struct page *page = i915_gem_object_get_page(obj, cache->page);
- vaddr = kmap_atomic(page);
+ vaddr = kmap_local_page(page);
cache->vaddr = unmask_flags(cache->vaddr) |
(unsigned long)vaddr;
} else {
@@ -1153,7 +1203,7 @@ static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer
if (cache->vaddr & CLFLUSH_AFTER)
mb();
- kunmap_atomic(vaddr);
+ kunmap_local(vaddr);
i915_gem_object_finish_access(obj);
} else {
struct i915_ggtt *ggtt = cache_to_ggtt(cache);
@@ -1185,7 +1235,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
struct page *page;
if (cache->vaddr) {
- kunmap_atomic(unmask_page(cache->vaddr));
+ kunmap_local(unmask_page(cache->vaddr));
} else {
unsigned int flushes;
int err;
@@ -1207,17 +1257,18 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
if (!obj->mm.dirty)
set_page_dirty(page);
- vaddr = kmap_atomic(page);
+ vaddr = kmap_local_page(page);
cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
cache->page = pageno;
return vaddr;
}
-static void *reloc_iomap(struct drm_i915_gem_object *obj,
+static void *reloc_iomap(struct i915_vma *batch,
struct i915_execbuffer *eb,
unsigned long page)
{
+ struct drm_i915_gem_object *obj = batch->obj;
struct reloc_cache *cache = &eb->reloc_cache;
struct i915_ggtt *ggtt = cache_to_ggtt(cache);
unsigned long offset;
@@ -1227,7 +1278,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
intel_gt_flush_ggtt_writes(ggtt->vm.gt);
io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
} else {
- struct i915_vma *vma;
+ struct i915_vma *vma = ERR_PTR(-ENODEV);
int err;
if (i915_gem_object_is_tiled(obj))
@@ -1240,10 +1291,21 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
if (err)
return ERR_PTR(err);
- vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
- PIN_MAPPABLE |
- PIN_NONBLOCK /* NOWARN */ |
- PIN_NOEVICT);
+ /*
+ * i915_gem_object_ggtt_pin_ww may attempt to remove the batch
+ * VMA from the object list because we no longer pin.
+ *
+ * Only attempt to pin the batch buffer to ggtt if the current batch
+ * is not inside ggtt, or the batch buffer is not misplaced.
+ */
+ if (!i915_is_ggtt(batch->vm) ||
+ !i915_vma_misplaced(batch, 0, 0, PIN_MAPPABLE)) {
+ vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
+ PIN_MAPPABLE |
+ PIN_NONBLOCK /* NOWARN */ |
+ PIN_NOEVICT);
+ }
+
if (vma == ERR_PTR(-EDEADLK))
return vma;
@@ -1259,7 +1321,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
if (err) /* no inactive aperture space, use cpu reloc */
return NULL;
} else {
- cache->node.start = vma->node.start;
+ cache->node.start = i915_ggtt_offset(vma);
cache->node.mm = (void *)vma;
}
}
@@ -1268,7 +1330,10 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
if (drm_mm_node_allocated(&cache->node)) {
ggtt->vm.insert_page(&ggtt->vm,
i915_gem_object_get_dma_address(obj, page),
- offset, I915_CACHE_NONE, 0);
+ offset,
+ i915_gem_get_pat_index(ggtt->vm.i915,
+ I915_CACHE_NONE),
+ 0);
} else {
offset += page << PAGE_SHIFT;
}
@@ -1281,7 +1346,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
return vaddr;
}
-static void *reloc_vaddr(struct drm_i915_gem_object *obj,
+static void *reloc_vaddr(struct i915_vma *vma,
struct i915_execbuffer *eb,
unsigned long page)
{
@@ -1293,9 +1358,9 @@ static void *reloc_vaddr(struct drm_i915_gem_object *obj,
} else {
vaddr = NULL;
if ((cache->vaddr & KMAP) == 0)
- vaddr = reloc_iomap(obj, eb, page);
+ vaddr = reloc_iomap(vma, eb, page);
if (!vaddr)
- vaddr = reloc_kmap(obj, cache, page);
+ vaddr = reloc_kmap(vma->obj, cache, page);
}
return vaddr;
@@ -1304,10 +1369,8 @@ static void *reloc_vaddr(struct drm_i915_gem_object *obj,
static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
{
if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
- if (flushes & CLFLUSH_BEFORE) {
- clflushopt(addr);
- mb();
- }
+ if (flushes & CLFLUSH_BEFORE)
+ drm_clflush_virt_range(addr, sizeof(*addr));
*addr = value;
@@ -1319,9 +1382,10 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
* to ensure ordering of clflush wrt to the system.
*/
if (flushes & CLFLUSH_AFTER)
- clflushopt(addr);
- } else
+ drm_clflush_virt_range(addr, sizeof(*addr));
+ } else {
*addr = value;
+ }
}
static u64
@@ -1336,7 +1400,7 @@ relocate_entry(struct i915_vma *vma,
void *vaddr;
repeat:
- vaddr = reloc_vaddr(vma->obj, eb,
+ vaddr = reloc_vaddr(vma, eb,
offset >> PAGE_SHIFT);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
@@ -1374,7 +1438,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
drm_dbg(&i915->drm, "reloc with multiple write domains: "
"target %d offset %d "
- "read %08x write %08x",
+ "read %08x write %08x\n",
reloc->target_handle,
(int) reloc->offset,
reloc->read_domains,
@@ -1385,7 +1449,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
& ~I915_GEM_GPU_DOMAINS)) {
drm_dbg(&i915->drm, "reloc with read/write non-GPU domains: "
"target %d offset %d "
- "read %08x write %08x",
+ "read %08x write %08x\n",
reloc->target_handle,
(int) reloc->offset,
reloc->read_domains,
@@ -1410,8 +1474,8 @@ eb_relocate_entry(struct i915_execbuffer *eb,
reloc_cache_unmap(&eb->reloc_cache);
mutex_lock(&vma->vm->mutex);
err = i915_vma_bind(target->vma,
- target->vma->obj->cache_level,
- PIN_GLOBAL, NULL);
+ target->vma->obj->pat_index,
+ PIN_GLOBAL, NULL, NULL);
mutex_unlock(&vma->vm->mutex);
reloc_cache_remap(&eb->reloc_cache, ev->vma->obj);
if (err)
@@ -1424,7 +1488,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
* more work needs to be done.
*/
if (!DBG_FORCE_RELOC &&
- gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset)
+ gen8_canonical_addr(i915_vma_offset(target->vma)) == reloc->presumed_offset)
return 0;
/* Check that the relocation address is valid... */
@@ -1468,7 +1532,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
u64_to_user_ptr(entry->relocs_ptr);
unsigned long remain = entry->relocation_count;
- if (unlikely(remain > N_RELOC(ULONG_MAX)))
+ if (unlikely(remain > N_RELOC(INT_MAX)))
return -EINVAL;
/*
@@ -1505,36 +1569,36 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
do {
u64 offset = eb_relocate_entry(eb, ev, r);
- if (likely(offset == 0)) {
- } else if ((s64)offset < 0) {
+ if (likely(offset == 0))
+ continue;
+
+ if ((s64)offset < 0) {
remain = (int)offset;
goto out;
- } else {
- /*
- * Note that reporting an error now
- * leaves everything in an inconsistent
- * state as we have *already* changed
- * the relocation value inside the
- * object. As we have not changed the
- * reloc.presumed_offset or will not
- * change the execobject.offset, on the
- * call we may not rewrite the value
- * inside the object, leaving it
- * dangling and causing a GPU hang. Unless
- * userspace dynamically rebuilds the
- * relocations on each execbuf rather than
- * presume a static tree.
- *
- * We did previously check if the relocations
- * were writable (access_ok), an error now
- * would be a strange race with mprotect,
- * having already demonstrated that we
- * can read from this userspace address.
- */
- offset = gen8_canonical_addr(offset & ~UPDATE);
- __put_user(offset,
- &urelocs[r - stack].presumed_offset);
}
+ /*
+ * Note that reporting an error now
+ * leaves everything in an inconsistent
+ * state as we have *already* changed
+ * the relocation value inside the
+ * object. As we have not changed the
+ * reloc.presumed_offset or will not
+ * change the execobject.offset, on the
+ * call we may not rewrite the value
+ * inside the object, leaving it
+ * dangling and causing a GPU hang. Unless
+ * userspace dynamically rebuilds the
+ * relocations on each execbuf rather than
+ * presume a static tree.
+ *
+ * We did previously check if the relocations
+ * were writable (access_ok), an error now
+ * would be a strange race with mprotect,
+ * having already demonstrated that we
+ * can read from this userspace address.
+ */
+ offset = gen8_canonical_addr(offset & ~UPDATE);
+ __put_user(offset, &urelocs[r - stack].presumed_offset);
} while (r++, --count);
urelocs += ARRAY_SIZE(stack);
} while (remain);
@@ -1576,7 +1640,7 @@ static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
if (size == 0)
return 0;
- if (size > N_RELOC(ULONG_MAX))
+ if (size > N_RELOC(INT_MAX))
return -EINVAL;
addr = u64_to_user_ptr(entry->relocs_ptr);
@@ -1616,7 +1680,7 @@ static int eb_copy_relocations(const struct i915_execbuffer *eb)
urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
size = nreloc * sizeof(*relocs);
- relocs = kvmalloc_array(size, 1, GFP_KERNEL);
+ relocs = kvmalloc_array(1, size, GFP_KERNEL);
if (!relocs) {
err = -ENOMEM;
goto err;
@@ -1937,11 +2001,10 @@ eb_find_first_request_added(struct i915_execbuffer *eb)
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
/* Stage with GFP_KERNEL allocations before we enter the signaling critical path */
-static void eb_capture_stage(struct i915_execbuffer *eb)
+static int eb_capture_stage(struct i915_execbuffer *eb)
{
const unsigned int count = eb->buffer_count;
unsigned int i = count, j;
- struct i915_vma_snapshot *vsnap;
while (i--) {
struct eb_vma *ev = &eb->vma[i];
@@ -1951,11 +2014,10 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
if (!(flags & EXEC_OBJECT_CAPTURE))
continue;
- vsnap = i915_vma_snapshot_alloc(GFP_KERNEL);
- if (!vsnap)
- continue;
+ if (i915_gem_context_is_recoverable(eb->gem_context) &&
+ (IS_DGFX(eb->i915) || GRAPHICS_VER_FULL(eb->i915) > IP_VER(12, 0)))
+ return -EINVAL;
- i915_vma_snapshot_init(vsnap, vma, "user");
for_each_batch_create_order(eb, j) {
struct i915_capture_list *capture;
@@ -1964,11 +2026,12 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
continue;
capture->next = eb->capture_lists[j];
- capture->vma_snapshot = i915_vma_snapshot_get(vsnap);
+ capture->vma_res = i915_vma_resource_get(vma->resource);
eb->capture_lists[j] = capture;
}
- i915_vma_snapshot_put(vsnap);
}
+
+ return 0;
}
/* Commit once we're in the critical path */
@@ -2010,8 +2073,9 @@ static void eb_capture_list_clear(struct i915_execbuffer *eb)
#else
-static void eb_capture_stage(struct i915_execbuffer *eb)
+static int eb_capture_stage(struct i915_execbuffer *eb)
{
+ return 0;
}
static void eb_capture_commit(struct i915_execbuffer *eb)
@@ -2088,18 +2152,13 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
eb->composite_fence ?
eb->composite_fence :
&eb->requests[j]->fence,
- flags | __EXEC_OBJECT_NO_RESERVE);
+ flags | __EXEC_OBJECT_NO_RESERVE |
+ __EXEC_OBJECT_NO_REQUEST_AWAIT);
}
}
#ifdef CONFIG_MMU_NOTIFIER
if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) {
- read_lock(&eb->i915->mm.notifier_lock);
-
- /*
- * count is always at least 1, otherwise __EXEC_USERPTR_USED
- * could not have been set
- */
for (i = 0; i < count; i++) {
struct eb_vma *ev = &eb->vma[i];
struct drm_i915_gem_object *obj = ev->vma->obj;
@@ -2111,8 +2170,6 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
if (err)
break;
}
-
- read_unlock(&eb->i915->mm.notifier_lock);
}
#endif
@@ -2135,7 +2192,8 @@ err_skip:
return err;
}
-static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
+static int i915_gem_check_execbuffer(struct drm_i915_private *i915,
+ struct drm_i915_gem_execbuffer2 *exec)
{
if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS)
return -EINVAL;
@@ -2148,7 +2206,7 @@ static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
}
if (exec->DR4 == 0xffffffff) {
- DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
+ drm_dbg(&i915->drm, "UXA submitting garbage DR4, fixing up\n");
exec->DR4 = 0;
}
if (exec->DR1 || exec->DR4)
@@ -2165,8 +2223,8 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
u32 *cs;
int i;
- if (GRAPHICS_VER(rq->engine->i915) != 7 || rq->engine->id != RCS0) {
- drm_dbg(&rq->engine->i915->drm, "sol reset is gen7/rcs only\n");
+ if (GRAPHICS_VER(rq->i915) != 7 || rq->engine->id != RCS0) {
+ drm_dbg(&rq->i915->drm, "sol reset is gen7/rcs only\n");
return -EINVAL;
}
@@ -2198,7 +2256,7 @@ shadow_batch_pin(struct i915_execbuffer *eb,
if (IS_ERR(vma))
return vma;
- err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags);
+ err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags | PIN_VALIDATE);
if (err)
return ERR_PTR(err);
@@ -2212,7 +2270,7 @@ static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i9
* batch" bit. Hence we need to pin secure batches into the global gtt.
* hsw should have this fixed, but bdw mucks it up again. */
if (eb->batch_flags & I915_DISPATCH_SECURE)
- return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, 0);
+ return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, PIN_VALIDATE);
return NULL;
}
@@ -2263,13 +2321,12 @@ static int eb_parse(struct i915_execbuffer *eb)
err = i915_gem_object_lock(pool->obj, &eb->ww);
if (err)
- goto err;
+ return err;
shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER);
- if (IS_ERR(shadow)) {
- err = PTR_ERR(shadow);
- goto err;
- }
+ if (IS_ERR(shadow))
+ return PTR_ERR(shadow);
+
intel_gt_buffer_pool_mark_used(pool);
i915_gem_object_set_readonly(shadow->obj);
shadow->private = pool;
@@ -2281,25 +2338,21 @@ static int eb_parse(struct i915_execbuffer *eb)
shadow = shadow_batch_pin(eb, pool->obj,
&eb->gt->ggtt->vm,
PIN_GLOBAL);
- if (IS_ERR(shadow)) {
- err = PTR_ERR(shadow);
- shadow = trampoline;
- goto err_shadow;
- }
+ if (IS_ERR(shadow))
+ return PTR_ERR(shadow);
+
shadow->private = pool;
eb->batch_flags |= I915_DISPATCH_SECURE;
}
batch = eb_dispatch_secure(eb, shadow);
- if (IS_ERR(batch)) {
- err = PTR_ERR(batch);
- goto err_trampoline;
- }
+ if (IS_ERR(batch))
+ return PTR_ERR(batch);
- err = dma_resv_reserve_shared(shadow->obj->base.resv, 1);
+ err = dma_resv_reserve_fences(shadow->obj->base.resv, 1);
if (err)
- goto err_trampoline;
+ return err;
err = intel_engine_cmd_parser(eb->context->engine,
eb->batches[0]->vma,
@@ -2307,7 +2360,7 @@ static int eb_parse(struct i915_execbuffer *eb)
eb->batch_len[0],
shadow, trampoline);
if (err)
- goto err_unpin_batch;
+ return err;
eb->batches[0] = &eb->vma[eb->buffer_count++];
eb->batches[0]->vma = i915_vma_get(shadow);
@@ -2326,17 +2379,6 @@ secure_batch:
eb->batches[0]->vma = i915_vma_get(batch);
}
return 0;
-
-err_unpin_batch:
- if (batch)
- i915_vma_unpin(batch);
-err_trampoline:
- if (trampoline)
- i915_vma_unpin(trampoline);
-err_shadow:
- i915_vma_unpin(shadow);
-err:
- return err;
}
static int eb_request_submit(struct i915_execbuffer *eb,
@@ -2368,7 +2410,7 @@ static int eb_request_submit(struct i915_execbuffer *eb,
}
err = rq->context->engine->emit_bb_start(rq,
- batch->node.start +
+ i915_vma_offset(batch) +
eb->batch_start_offset,
batch_len,
eb->batch_flags);
@@ -2379,7 +2421,7 @@ static int eb_request_submit(struct i915_execbuffer *eb,
GEM_BUG_ON(intel_context_is_parallel(rq->context));
GEM_BUG_ON(eb->batch_start_offset);
err = rq->context->engine->emit_bb_start(rq,
- eb->trampoline->node.start +
+ i915_vma_offset(eb->trampoline) +
batch_len, 0, 0);
if (err)
return err;
@@ -2409,17 +2451,12 @@ static int eb_submit(struct i915_execbuffer *eb)
return err;
}
-static int num_vcs_engines(struct drm_i915_private *i915)
-{
- return hweight_long(VDBOX_MASK(to_gt(i915)));
-}
-
/*
* Find one BSD ring to dispatch the corresponding BSD command.
* The engine index is returned.
*/
static unsigned int
-gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
+gen8_dispatch_bsd_engine(struct drm_i915_private *i915,
struct drm_file *file)
{
struct drm_i915_file_private *file_priv = file->driver_priv;
@@ -2427,7 +2464,7 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
/* Check whether the file_priv has already selected one ring. */
if ((int)file_priv->bsd_engine < 0)
file_priv->bsd_engine =
- get_random_int() % num_vcs_engines(dev_priv);
+ get_random_u32_below(i915->engine_uabi_class_count[I915_ENGINE_CLASS_VIDEO]);
return file_priv->bsd_engine;
}
@@ -2505,9 +2542,14 @@ static int eb_pin_timeline(struct i915_execbuffer *eb, struct intel_context *ce,
timeout) < 0) {
i915_request_put(rq);
- tl = intel_context_timeline_lock(ce);
+ /*
+ * Error path, cannot use intel_context_timeline_lock as
+ * that is user interruptible and this clean up step
+ * must be done.
+ */
+ mutex_lock(&ce->timeline->mutex);
intel_context_exit(ce);
- intel_context_timeline_unlock(tl);
+ mutex_unlock(&ce->timeline->mutex);
if (nonblock)
return -EWOULDBLOCK;
@@ -2610,7 +2652,8 @@ eb_select_legacy_ring(struct i915_execbuffer *eb)
return -1;
}
- if (user_ring_id == I915_EXEC_BSD && num_vcs_engines(i915) > 1) {
+ if (user_ring_id == I915_EXEC_BSD &&
+ i915->engine_uabi_class_count[I915_ENGINE_CLASS_VIDEO] > 1) {
unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
@@ -2642,6 +2685,7 @@ static int
eb_select_engine(struct i915_execbuffer *eb)
{
struct intel_context *ce, *child;
+ struct intel_gt *gt;
unsigned int idx;
int err;
@@ -2665,10 +2709,17 @@ eb_select_engine(struct i915_execbuffer *eb)
}
}
eb->num_batches = ce->parallel.number_children + 1;
+ gt = ce->engine->gt;
for_each_child(ce, child)
intel_context_get(child);
- intel_gt_pm_get(ce->engine->gt);
+ eb->wakeref = intel_gt_pm_get(ce->engine->gt);
+ /*
+ * Keep GT0 active on MTL so that i915_vma_parked() doesn't
+ * free VMAs while execbuf ioctl is validating VMAs.
+ */
+ if (gt->info.id)
+ eb->wakeref_gt0 = intel_gt_pm_get(to_gt(gt->i915));
if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
err = intel_context_alloc_state(ce);
@@ -2691,6 +2742,11 @@ eb_select_engine(struct i915_execbuffer *eb)
if (err)
goto err;
+ if (!i915_vm_tryget(ce->vm)) {
+ err = -ENOENT;
+ goto err;
+ }
+
eb->context = ce;
eb->gt = ce->engine->gt;
@@ -2702,7 +2758,10 @@ eb_select_engine(struct i915_execbuffer *eb)
return err;
err:
- intel_gt_pm_put(ce->engine->gt);
+ if (gt->info.id)
+ intel_gt_pm_put(to_gt(gt->i915), eb->wakeref_gt0);
+
+ intel_gt_pm_put(ce->engine->gt, eb->wakeref);
for_each_child(ce, child)
intel_context_put(child);
intel_context_put(ce);
@@ -2714,7 +2773,14 @@ eb_put_engine(struct i915_execbuffer *eb)
{
struct intel_context *child;
- intel_gt_pm_put(eb->gt);
+ i915_vm_put(eb->context->vm);
+ /*
+ * This works in conjunction with eb_select_engine() to prevent
+ * i915_vma_parked() from interfering while execbuf validates vmas.
+ */
+ if (eb->gt->info.id)
+ intel_gt_pm_put(to_gt(eb->gt->i915), eb->wakeref_gt0);
+ intel_gt_pm_put(eb->context->engine->gt, eb->wakeref);
for_each_child(eb->context, child)
intel_context_put(child);
intel_context_put(eb->context);
@@ -2791,7 +2857,8 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
syncobj = drm_syncobj_find(eb->file, user_fence.handle);
if (!syncobj) {
- DRM_DEBUG("Invalid syncobj handle provided\n");
+ drm_dbg(&eb->i915->drm,
+ "Invalid syncobj handle provided\n");
return -ENOENT;
}
@@ -2799,7 +2866,8 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
if (!fence && user_fence.flags &&
!(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
- DRM_DEBUG("Syncobj handle has no fence\n");
+ drm_dbg(&eb->i915->drm,
+ "Syncobj handle has no fence\n");
drm_syncobj_put(syncobj);
return -EINVAL;
}
@@ -2808,7 +2876,9 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
err = dma_fence_chain_find_seqno(&fence, point);
if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
- DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
+ drm_dbg(&eb->i915->drm,
+ "Syncobj handle missing requested point %llu\n",
+ point);
dma_fence_put(fence);
drm_syncobj_put(syncobj);
return err;
@@ -2834,7 +2904,8 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
* 0) would break the timeline.
*/
if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
- DRM_DEBUG("Trying to wait & signal the same timeline point.\n");
+ drm_dbg(&eb->i915->drm,
+ "Trying to wait & signal the same timeline point.\n");
dma_fence_put(fence);
drm_syncobj_put(syncobj);
return -EINVAL;
@@ -2905,14 +2976,16 @@ static int add_fence_array(struct i915_execbuffer *eb)
syncobj = drm_syncobj_find(eb->file, user_fence.handle);
if (!syncobj) {
- DRM_DEBUG("Invalid syncobj handle provided\n");
+ drm_dbg(&eb->i915->drm,
+ "Invalid syncobj handle provided\n");
return -ENOENT;
}
if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
fence = drm_syncobj_fence_get(syncobj);
if (!fence) {
- DRM_DEBUG("Syncobj handle has no fence\n");
+ drm_dbg(&eb->i915->drm,
+ "Syncobj handle has no fence\n");
drm_syncobj_put(syncobj);
return -EINVAL;
}
@@ -2946,11 +3019,6 @@ await_fence_array(struct i915_execbuffer *eb,
int err;
for (n = 0; n < eb->num_fences; n++) {
- struct drm_syncobj *syncobj;
- unsigned int flags;
-
- syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2);
-
if (!eb->fences[n].dma_fence)
continue;
@@ -3270,9 +3338,8 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence,
* _onstack interface.
*/
if (eb->batches[i]->vma)
- i915_vma_snapshot_init_onstack(&eb->requests[i]->batch_snapshot,
- eb->batches[i]->vma,
- "batch");
+ eb->requests[i]->batch_res =
+ i915_vma_resource_get(eb->batches[i]->vma->resource);
if (eb->batch_pool) {
GEM_BUG_ON(intel_context_is_parallel(eb->context));
intel_gt_buffer_pool_mark_active(eb->batch_pool,
@@ -3409,7 +3476,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
}
ww_acquire_done(&eb.ww.ctx);
- eb_capture_stage(&eb);
+ err = eb_capture_stage(&eb);
+ if (err)
+ goto err_vma;
out_fence = eb_requests_create(&eb, in_fence, out_fence_fd);
if (IS_ERR(out_fence)) {
@@ -3432,6 +3501,13 @@ err_request:
eb.composite_fence :
&eb.requests[0]->fence);
+ if (unlikely(eb.gem_context->syncobj)) {
+ drm_syncobj_replace_fence(eb.gem_context->syncobj,
+ eb.composite_fence ?
+ eb.composite_fence :
+ &eb.requests[0]->fence);
+ }
+
if (out_fence) {
if (err == 0) {
fd_install(out_fence_fd, out_fence->file);
@@ -3443,13 +3519,6 @@ err_request:
}
}
- if (unlikely(eb.gem_context->syncobj)) {
- drm_syncobj_replace_fence(eb.gem_context->syncobj,
- eb.composite_fence ?
- eb.composite_fence :
- &eb.requests[0]->fence);
- }
-
if (!out_fence && eb.composite_fence)
dma_fence_put(eb.composite_fence);
@@ -3457,8 +3526,6 @@ err_request:
err_vma:
eb_release_vmas(&eb, true);
- if (eb.trampoline)
- i915_vma_unpin(eb.trampoline);
WARN_ON(err == -EDEADLK);
i915_gem_ww_ctx_fini(&eb.ww);
@@ -3513,7 +3580,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
- err = i915_gem_check_execbuffer(args);
+ err = i915_gem_check_execbuffer(i915, args);
if (err)
return err;