summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c843
1 files changed, 534 insertions, 309 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 8a4e9c1cbf6c..3ce185670ca4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -15,8 +15,8 @@
#include "gem/i915_gem_ioctls.h"
#include "gt/intel_context.h"
-#include "gt/intel_engine_pool.h"
#include "gt/intel_gt.h"
+#include "gt/intel_gt_buffer_pool.h"
#include "gt/intel_gt_pm.h"
#include "gt/intel_ring.h"
@@ -40,6 +40,11 @@ struct eb_vma {
u32 handle;
};
+struct eb_vma_array {
+ struct kref kref;
+ struct eb_vma vma[];
+};
+
enum {
FORCE_CPU_RELOC = 1,
FORCE_GTT_RELOC,
@@ -52,7 +57,6 @@ enum {
#define __EXEC_OBJECT_NEEDS_MAP BIT(29)
#define __EXEC_OBJECT_NEEDS_BIAS BIT(28)
#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 28) /* all of the above */
-#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
#define __EXEC_HAS_RELOC BIT(31)
#define __EXEC_INTERNAL_FLAGS (~0u << 31)
@@ -264,7 +268,9 @@ struct i915_execbuffer {
bool has_fence : 1;
bool needs_unfenced : 1;
+ struct i915_vma *target;
struct i915_request *rq;
+ struct i915_vma *rq_vma;
u32 *rq_cmd;
unsigned int rq_size;
} reloc_cache;
@@ -283,6 +289,7 @@ struct i915_execbuffer {
*/
int lut_size;
struct hlist_head *buckets; /** ht for relocation handles */
+ struct eb_vma_array *array;
};
static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
@@ -292,8 +299,62 @@ static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
eb->args->batch_len);
}
+static struct eb_vma_array *eb_vma_array_create(unsigned int count)
+{
+ struct eb_vma_array *arr;
+
+ arr = kvmalloc(struct_size(arr, vma, count), GFP_KERNEL | __GFP_NOWARN);
+ if (!arr)
+ return NULL;
+
+ kref_init(&arr->kref);
+ arr->vma[0].vma = NULL;
+
+ return arr;
+}
+
+static inline void eb_unreserve_vma(struct eb_vma *ev)
+{
+ struct i915_vma *vma = ev->vma;
+
+ if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
+ __i915_vma_unpin_fence(vma);
+
+ if (ev->flags & __EXEC_OBJECT_HAS_PIN)
+ __i915_vma_unpin(vma);
+
+ ev->flags &= ~(__EXEC_OBJECT_HAS_PIN |
+ __EXEC_OBJECT_HAS_FENCE);
+}
+
+static void eb_vma_array_destroy(struct kref *kref)
+{
+ struct eb_vma_array *arr = container_of(kref, typeof(*arr), kref);
+ struct eb_vma *ev = arr->vma;
+
+ while (ev->vma) {
+ eb_unreserve_vma(ev);
+ i915_vma_put(ev->vma);
+ ev++;
+ }
+
+ kvfree(arr);
+}
+
+static void eb_vma_array_put(struct eb_vma_array *arr)
+{
+ kref_put(&arr->kref, eb_vma_array_destroy);
+}
+
static int eb_create(struct i915_execbuffer *eb)
{
+ /* Allocate an extra slot for use by the command parser + sentinel */
+ eb->array = eb_vma_array_create(eb->buffer_count + 2);
+ if (!eb->array)
+ return -ENOMEM;
+
+ eb->vma = eb->array->vma;
+
if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
unsigned int size = 1 + ilog2(eb->buffer_count);
@@ -327,8 +388,10 @@ static int eb_create(struct i915_execbuffer *eb)
break;
} while (--size);
- if (unlikely(!size))
+ if (unlikely(!size)) {
+ eb_vma_array_put(eb->array);
return -ENOMEM;
+ }
eb->lut_size = size;
} else {
@@ -368,6 +431,32 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
return false;
}
+static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry,
+ unsigned int exec_flags)
+{
+ u64 pin_flags = 0;
+
+ if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
+ pin_flags |= PIN_GLOBAL;
+
+ /*
+ * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
+ * limit address to the first 4GBs for unflagged objects.
+ */
+ if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
+ pin_flags |= PIN_ZONE_4G;
+
+ if (exec_flags & __EXEC_OBJECT_NEEDS_MAP)
+ pin_flags |= PIN_MAPPABLE;
+
+ if (exec_flags & EXEC_OBJECT_PINNED)
+ pin_flags |= entry->offset | PIN_OFFSET_FIXED;
+ else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS)
+ pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
+
+ return pin_flags;
+}
+
static inline bool
eb_pin_vma(struct i915_execbuffer *eb,
const struct drm_i915_gem_exec_object2 *entry,
@@ -385,8 +474,19 @@ eb_pin_vma(struct i915_execbuffer *eb,
if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT))
pin_flags |= PIN_GLOBAL;
- if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags)))
- return false;
+ /* Attempt to reuse the current location if available */
+ if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) {
+ if (entry->flags & EXEC_OBJECT_PINNED)
+ return false;
+
+ /* Failing that pick any _free_ space if suitable */
+ if (unlikely(i915_vma_pin(vma,
+ entry->pad_to_size,
+ entry->alignment,
+ eb_pin_flags(entry, ev->flags) |
+ PIN_USER | PIN_NOEVICT)))
+ return false;
+ }
if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
if (unlikely(i915_vma_pin_fence(vma))) {
@@ -402,26 +502,6 @@ eb_pin_vma(struct i915_execbuffer *eb,
return !eb_vma_misplaced(entry, vma, ev->flags);
}
-static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
-{
- GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
-
- if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
- __i915_vma_unpin_fence(vma);
-
- __i915_vma_unpin(vma);
-}
-
-static inline void
-eb_unreserve_vma(struct eb_vma *ev)
-{
- if (!(ev->flags & __EXEC_OBJECT_HAS_PIN))
- return;
-
- __eb_unreserve_vma(ev->vma, ev->flags);
- ev->flags &= ~__EXEC_OBJECT_RESERVED;
-}
-
static int
eb_validate_vma(struct i915_execbuffer *eb,
struct drm_i915_gem_exec_object2 *entry,
@@ -481,7 +561,7 @@ eb_add_vma(struct i915_execbuffer *eb,
GEM_BUG_ON(i915_vma_is_closed(vma));
- ev->vma = i915_vma_get(vma);
+ ev->vma = vma;
ev->exec = entry;
ev->flags = entry->flags;
@@ -547,28 +627,9 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
u64 pin_flags)
{
struct drm_i915_gem_exec_object2 *entry = ev->exec;
- unsigned int exec_flags = ev->flags;
struct i915_vma *vma = ev->vma;
int err;
- if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
- pin_flags |= PIN_GLOBAL;
-
- /*
- * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
- * limit address to the first 4GBs for unflagged objects.
- */
- if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
- pin_flags |= PIN_ZONE_4G;
-
- if (exec_flags & __EXEC_OBJECT_NEEDS_MAP)
- pin_flags |= PIN_MAPPABLE;
-
- if (exec_flags & EXEC_OBJECT_PINNED)
- pin_flags |= entry->offset | PIN_OFFSET_FIXED;
- else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS)
- pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
-
if (drm_mm_node_allocated(&vma->node) &&
eb_vma_misplaced(entry, vma, ev->flags)) {
err = i915_vma_unbind(vma);
@@ -578,7 +639,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
err = i915_vma_pin(vma,
entry->pad_to_size, entry->alignment,
- pin_flags);
+ eb_pin_flags(entry, ev->flags) | pin_flags);
if (err)
return err;
@@ -587,7 +648,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
eb->args->flags |= __EXEC_HAS_RELOC;
}
- if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
+ if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
err = i915_vma_pin_fence(vma);
if (unlikely(err)) {
i915_vma_unpin(vma);
@@ -595,10 +656,10 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
}
if (vma->fence)
- exec_flags |= __EXEC_OBJECT_HAS_FENCE;
+ ev->flags |= __EXEC_OBJECT_HAS_FENCE;
}
- ev->flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
+ ev->flags |= __EXEC_OBJECT_HAS_PIN;
GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags));
return 0;
@@ -728,77 +789,117 @@ static int eb_select_context(struct i915_execbuffer *eb)
return 0;
}
-static int eb_lookup_vmas(struct i915_execbuffer *eb)
+static int __eb_add_lut(struct i915_execbuffer *eb,
+ u32 handle, struct i915_vma *vma)
{
- struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma;
- struct drm_i915_gem_object *obj;
- unsigned int i, batch;
+ struct i915_gem_context *ctx = eb->gem_context;
+ struct i915_lut_handle *lut;
int err;
- if (unlikely(i915_gem_context_is_closed(eb->gem_context)))
- return -ENOENT;
+ lut = i915_lut_handle_alloc();
+ if (unlikely(!lut))
+ return -ENOMEM;
- INIT_LIST_HEAD(&eb->relocs);
- INIT_LIST_HEAD(&eb->unbound);
+ i915_vma_get(vma);
+ if (!atomic_fetch_inc(&vma->open_count))
+ i915_vma_reopen(vma);
+ lut->handle = handle;
+ lut->ctx = ctx;
+
+ /* Check that the context hasn't been closed in the meantime */
+ err = -EINTR;
+ if (!mutex_lock_interruptible(&ctx->mutex)) {
+ err = -ENOENT;
+ if (likely(!i915_gem_context_is_closed(ctx)))
+ err = radix_tree_insert(&ctx->handles_vma, handle, vma);
+ if (err == 0) { /* And nor has this handle */
+ struct drm_i915_gem_object *obj = vma->obj;
+
+ i915_gem_object_lock(obj);
+ if (idr_find(&eb->file->object_idr, handle) == obj) {
+ list_add(&lut->obj_link, &obj->lut_list);
+ } else {
+ radix_tree_delete(&ctx->handles_vma, handle);
+ err = -ENOENT;
+ }
+ i915_gem_object_unlock(obj);
+ }
+ mutex_unlock(&ctx->mutex);
+ }
+ if (unlikely(err))
+ goto err;
- batch = eb_batch_index(eb);
+ return 0;
- for (i = 0; i < eb->buffer_count; i++) {
- u32 handle = eb->exec[i].handle;
- struct i915_lut_handle *lut;
+err:
+ i915_vma_close(vma);
+ i915_vma_put(vma);
+ i915_lut_handle_free(lut);
+ return err;
+}
+
+static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle)
+{
+ do {
+ struct drm_i915_gem_object *obj;
struct i915_vma *vma;
+ int err;
- vma = radix_tree_lookup(handles_vma, handle);
+ rcu_read_lock();
+ vma = radix_tree_lookup(&eb->gem_context->handles_vma, handle);
if (likely(vma))
- goto add_vma;
+ vma = i915_vma_tryget(vma);
+ rcu_read_unlock();
+ if (likely(vma))
+ return vma;
obj = i915_gem_object_lookup(eb->file, handle);
- if (unlikely(!obj)) {
- err = -ENOENT;
- goto err_vma;
- }
+ if (unlikely(!obj))
+ return ERR_PTR(-ENOENT);
vma = i915_vma_instance(obj, eb->context->vm, NULL);
if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
- goto err_obj;
+ i915_gem_object_put(obj);
+ return vma;
}
- lut = i915_lut_handle_alloc();
- if (unlikely(!lut)) {
- err = -ENOMEM;
- goto err_obj;
- }
+ err = __eb_add_lut(eb, handle, vma);
+ if (likely(!err))
+ return vma;
- err = radix_tree_insert(handles_vma, handle, vma);
- if (unlikely(err)) {
- i915_lut_handle_free(lut);
- goto err_obj;
- }
+ i915_gem_object_put(obj);
+ if (err != -EEXIST)
+ return ERR_PTR(err);
+ } while (1);
+}
- /* transfer ref to lut */
- if (!atomic_fetch_inc(&vma->open_count))
- i915_vma_reopen(vma);
- lut->handle = handle;
- lut->ctx = eb->gem_context;
+static int eb_lookup_vmas(struct i915_execbuffer *eb)
+{
+ unsigned int batch = eb_batch_index(eb);
+ unsigned int i;
+ int err = 0;
- i915_gem_object_lock(obj);
- list_add(&lut->obj_link, &obj->lut_list);
- i915_gem_object_unlock(obj);
+ INIT_LIST_HEAD(&eb->relocs);
+ INIT_LIST_HEAD(&eb->unbound);
+
+ for (i = 0; i < eb->buffer_count; i++) {
+ struct i915_vma *vma;
+
+ vma = eb_lookup_vma(eb, eb->exec[i].handle);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ break;
+ }
-add_vma:
err = eb_validate_vma(eb, &eb->exec[i], vma);
- if (unlikely(err))
- goto err_vma;
+ if (unlikely(err)) {
+ i915_vma_put(vma);
+ break;
+ }
eb_add_vma(eb, i, batch, vma);
}
- return 0;
-
-err_obj:
- i915_gem_object_put(obj);
-err_vma:
eb->vma[i].vma = NULL;
return err;
}
@@ -823,31 +924,13 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
}
}
-static void eb_release_vmas(const struct i915_execbuffer *eb)
-{
- const unsigned int count = eb->buffer_count;
- unsigned int i;
-
- for (i = 0; i < count; i++) {
- struct eb_vma *ev = &eb->vma[i];
- struct i915_vma *vma = ev->vma;
-
- if (!vma)
- break;
-
- eb->vma[i].vma = NULL;
-
- if (ev->flags & __EXEC_OBJECT_HAS_PIN)
- __eb_unreserve_vma(vma, ev->flags);
-
- i915_vma_put(vma);
- }
-}
-
static void eb_destroy(const struct i915_execbuffer *eb)
{
GEM_BUG_ON(eb->reloc_cache.rq);
+ if (eb->array)
+ eb_vma_array_put(eb->array);
+
if (eb->lut_size > 0)
kfree(eb->buckets);
}
@@ -872,7 +955,7 @@ static void reloc_cache_init(struct reloc_cache *cache,
cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
cache->node.flags = 0;
cache->rq = NULL;
- cache->rq_size = 0;
+ cache->target = NULL;
}
static inline void *unmask_page(unsigned long p)
@@ -894,29 +977,122 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
return &i915->ggtt;
}
-static void reloc_gpu_flush(struct reloc_cache *cache)
+#define RELOC_TAIL 4
+
+static int reloc_gpu_chain(struct reloc_cache *cache)
+{
+ struct intel_gt_buffer_pool_node *pool;
+ struct i915_request *rq = cache->rq;
+ struct i915_vma *batch;
+ u32 *cmd;
+ int err;
+
+ pool = intel_gt_get_buffer_pool(rq->engine->gt, PAGE_SIZE);
+ if (IS_ERR(pool))
+ return PTR_ERR(pool);
+
+ batch = i915_vma_instance(pool->obj, rq->context->vm, NULL);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ goto out_pool;
+ }
+
+ err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
+ if (err)
+ goto out_pool;
+
+ GEM_BUG_ON(cache->rq_size + RELOC_TAIL > PAGE_SIZE / sizeof(u32));
+ cmd = cache->rq_cmd + cache->rq_size;
+ *cmd++ = MI_ARB_CHECK;
+ if (cache->gen >= 8)
+ *cmd++ = MI_BATCH_BUFFER_START_GEN8;
+ else if (cache->gen >= 6)
+ *cmd++ = MI_BATCH_BUFFER_START;
+ else
+ *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
+ *cmd++ = lower_32_bits(batch->node.start);
+ *cmd++ = upper_32_bits(batch->node.start); /* Always 0 for gen<8 */
+ i915_gem_object_flush_map(cache->rq_vma->obj);
+ i915_gem_object_unpin_map(cache->rq_vma->obj);
+ cache->rq_vma = NULL;
+
+ err = intel_gt_buffer_pool_mark_active(pool, rq);
+ if (err == 0) {
+ i915_vma_lock(batch);
+ err = i915_request_await_object(rq, batch->obj, false);
+ if (err == 0)
+ err = i915_vma_move_to_active(batch, rq, 0);
+ i915_vma_unlock(batch);
+ }
+ i915_vma_unpin(batch);
+ if (err)
+ goto out_pool;
+
+ cmd = i915_gem_object_pin_map(batch->obj,
+ cache->has_llc ?
+ I915_MAP_FORCE_WB :
+ I915_MAP_FORCE_WC);
+ if (IS_ERR(cmd)) {
+ err = PTR_ERR(cmd);
+ goto out_pool;
+ }
+
+ /* Return with batch mapping (cmd) still pinned */
+ cache->rq_cmd = cmd;
+ cache->rq_size = 0;
+ cache->rq_vma = batch;
+
+out_pool:
+ intel_gt_buffer_pool_put(pool);
+ return err;
+}
+
+static unsigned int reloc_bb_flags(const struct reloc_cache *cache)
{
- struct drm_i915_gem_object *obj = cache->rq->batch->obj;
+ return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE;
+}
+
+static int reloc_gpu_flush(struct reloc_cache *cache)
+{
+ struct i915_request *rq;
+ int err;
- GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
- cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
+ rq = fetch_and_zero(&cache->rq);
+ if (!rq)
+ return 0;
- __i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1));
- i915_gem_object_unpin_map(obj);
+ if (cache->rq_vma) {
+ struct drm_i915_gem_object *obj = cache->rq_vma->obj;
- intel_gt_chipset_flush(cache->rq->engine->gt);
+ GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
+ cache->rq_cmd[cache->rq_size++] = MI_BATCH_BUFFER_END;
- i915_request_add(cache->rq);
- cache->rq = NULL;
+ __i915_gem_object_flush_map(obj,
+ 0, sizeof(u32) * cache->rq_size);
+ i915_gem_object_unpin_map(obj);
+ }
+
+ err = 0;
+ if (rq->engine->emit_init_breadcrumb)
+ err = rq->engine->emit_init_breadcrumb(rq);
+ if (!err)
+ err = rq->engine->emit_bb_start(rq,
+ rq->batch->node.start,
+ PAGE_SIZE,
+ reloc_bb_flags(cache));
+ if (err)
+ i915_request_set_error_once(rq, err);
+
+ intel_gt_chipset_flush(rq->engine->gt);
+ i915_request_add(rq);
+
+ return err;
}
static void reloc_cache_reset(struct reloc_cache *cache)
{
void *vaddr;
- if (cache->rq)
- reloc_gpu_flush(cache);
-
if (!cache->vaddr)
return;
@@ -1109,17 +1285,17 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
}
static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
- struct i915_vma *vma,
+ struct intel_engine_cs *engine,
unsigned int len)
{
struct reloc_cache *cache = &eb->reloc_cache;
- struct intel_engine_pool_node *pool;
+ struct intel_gt_buffer_pool_node *pool;
struct i915_request *rq;
struct i915_vma *batch;
u32 *cmd;
int err;
- pool = intel_engine_get_pool(eb->engine, PAGE_SIZE);
+ pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE);
if (IS_ERR(pool))
return PTR_ERR(pool);
@@ -1132,7 +1308,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
goto out_pool;
}
- batch = i915_vma_instance(pool->obj, vma->vm, NULL);
+ batch = i915_vma_instance(pool->obj, eb->context->vm, NULL);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto err_unmap;
@@ -1142,26 +1318,32 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
if (err)
goto err_unmap;
- rq = i915_request_create(eb->context);
+ if (engine == eb->context->engine) {
+ rq = i915_request_create(eb->context);
+ } else {
+ struct intel_context *ce;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto err_unpin;
+ }
+
+ i915_vm_put(ce->vm);
+ ce->vm = i915_vm_get(eb->context->vm);
+
+ rq = intel_context_create_request(ce);
+ intel_context_put(ce);
+ }
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_unpin;
}
- err = intel_engine_pool_mark_active(pool, rq);
+ err = intel_gt_buffer_pool_mark_active(pool, rq);
if (err)
goto err_request;
- err = reloc_move_to_gpu(rq, vma);
- if (err)
- goto err_request;
-
- err = eb->engine->emit_bb_start(rq,
- batch->node.start, PAGE_SIZE,
- cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
- if (err)
- goto skip_request;
-
i915_vma_lock(batch);
err = i915_request_await_object(rq, batch->obj, false);
if (err == 0)
@@ -1176,6 +1358,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
cache->rq = rq;
cache->rq_cmd = cmd;
cache->rq_size = 0;
+ cache->rq_vma = batch;
/* Return with batch mapping (cmd) still pinned */
goto out_pool;
@@ -1189,124 +1372,206 @@ err_unpin:
err_unmap:
i915_gem_object_unpin_map(pool->obj);
out_pool:
- intel_engine_pool_put(pool);
+ intel_gt_buffer_pool_put(pool);
return err;
}
+static bool reloc_can_use_engine(const struct intel_engine_cs *engine)
+{
+ return engine->class != VIDEO_DECODE_CLASS || !IS_GEN(engine->i915, 6);
+}
+
static u32 *reloc_gpu(struct i915_execbuffer *eb,
struct i915_vma *vma,
unsigned int len)
{
struct reloc_cache *cache = &eb->reloc_cache;
u32 *cmd;
-
- if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
- reloc_gpu_flush(cache);
+ int err;
if (unlikely(!cache->rq)) {
- int err;
+ struct intel_engine_cs *engine = eb->engine;
- if (!intel_engine_can_store_dword(eb->engine))
- return ERR_PTR(-ENODEV);
+ if (!reloc_can_use_engine(engine)) {
+ engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
+ if (!engine)
+ return ERR_PTR(-ENODEV);
+ }
- err = __reloc_gpu_alloc(eb, vma, len);
+ err = __reloc_gpu_alloc(eb, engine, len);
if (unlikely(err))
return ERR_PTR(err);
}
+ if (vma != cache->target) {
+ err = reloc_move_to_gpu(cache->rq, vma);
+ if (unlikely(err)) {
+ i915_request_set_error_once(cache->rq, err);
+ return ERR_PTR(err);
+ }
+
+ cache->target = vma;
+ }
+
+ if (unlikely(cache->rq_size + len >
+ PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) {
+ err = reloc_gpu_chain(cache);
+ if (unlikely(err)) {
+ i915_request_set_error_once(cache->rq, err);
+ return ERR_PTR(err);
+ }
+ }
+
+ GEM_BUG_ON(cache->rq_size + len >= PAGE_SIZE / sizeof(u32));
cmd = cache->rq_cmd + cache->rq_size;
cache->rq_size += len;
return cmd;
}
-static u64
-relocate_entry(struct i915_vma *vma,
- const struct drm_i915_gem_relocation_entry *reloc,
- struct i915_execbuffer *eb,
- const struct i915_vma *target)
+static inline bool use_reloc_gpu(struct i915_vma *vma)
{
- u64 offset = reloc->offset;
- u64 target_offset = relocation_target(reloc, target);
- bool wide = eb->reloc_cache.use_64bit_reloc;
- void *vaddr;
+ if (DBG_FORCE_RELOC == FORCE_GPU_RELOC)
+ return true;
- if (!eb->reloc_cache.vaddr &&
- (DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
- !dma_resv_test_signaled_rcu(vma->resv, true))) {
- const unsigned int gen = eb->reloc_cache.gen;
- unsigned int len;
- u32 *batch;
- u64 addr;
-
- if (wide)
- len = offset & 7 ? 8 : 5;
- else if (gen >= 4)
- len = 4;
- else
- len = 3;
+ if (DBG_FORCE_RELOC)
+ return false;
- batch = reloc_gpu(eb, vma, len);
- if (IS_ERR(batch))
- goto repeat;
+ return !dma_resv_test_signaled_rcu(vma->resv, true);
+}
- addr = gen8_canonical_addr(vma->node.start + offset);
- if (wide) {
- if (offset & 7) {
- *batch++ = MI_STORE_DWORD_IMM_GEN4;
- *batch++ = lower_32_bits(addr);
- *batch++ = upper_32_bits(addr);
- *batch++ = lower_32_bits(target_offset);
-
- addr = gen8_canonical_addr(addr + 4);
-
- *batch++ = MI_STORE_DWORD_IMM_GEN4;
- *batch++ = lower_32_bits(addr);
- *batch++ = upper_32_bits(addr);
- *batch++ = upper_32_bits(target_offset);
- } else {
- *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
- *batch++ = lower_32_bits(addr);
- *batch++ = upper_32_bits(addr);
- *batch++ = lower_32_bits(target_offset);
- *batch++ = upper_32_bits(target_offset);
- }
- } else if (gen >= 6) {
+static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset)
+{
+ struct page *page;
+ unsigned long addr;
+
+ GEM_BUG_ON(vma->pages != vma->obj->mm.pages);
+
+ page = i915_gem_object_get_page(vma->obj, offset >> PAGE_SHIFT);
+ addr = PFN_PHYS(page_to_pfn(page));
+ GEM_BUG_ON(overflows_type(addr, u32)); /* expected dma32 */
+
+ return addr + offset_in_page(offset);
+}
+
+static bool __reloc_entry_gpu(struct i915_execbuffer *eb,
+ struct i915_vma *vma,
+ u64 offset,
+ u64 target_addr)
+{
+ const unsigned int gen = eb->reloc_cache.gen;
+ unsigned int len;
+ u32 *batch;
+ u64 addr;
+
+ if (gen >= 8)
+ len = offset & 7 ? 8 : 5;
+ else if (gen >= 4)
+ len = 4;
+ else
+ len = 3;
+
+ batch = reloc_gpu(eb, vma, len);
+ if (IS_ERR(batch))
+ return false;
+
+ addr = gen8_canonical_addr(vma->node.start + offset);
+ if (gen >= 8) {
+ if (offset & 7) {
*batch++ = MI_STORE_DWORD_IMM_GEN4;
- *batch++ = 0;
- *batch++ = addr;
- *batch++ = target_offset;
- } else if (gen >= 4) {
- *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
- *batch++ = 0;
- *batch++ = addr;
- *batch++ = target_offset;
+ *batch++ = lower_32_bits(addr);
+ *batch++ = upper_32_bits(addr);
+ *batch++ = lower_32_bits(target_addr);
+
+ addr = gen8_canonical_addr(addr + 4);
+
+ *batch++ = MI_STORE_DWORD_IMM_GEN4;
+ *batch++ = lower_32_bits(addr);
+ *batch++ = upper_32_bits(addr);
+ *batch++ = upper_32_bits(target_addr);
} else {
- *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
- *batch++ = addr;
- *batch++ = target_offset;
+ *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
+ *batch++ = lower_32_bits(addr);
+ *batch++ = upper_32_bits(addr);
+ *batch++ = lower_32_bits(target_addr);
+ *batch++ = upper_32_bits(target_addr);
}
-
- goto out;
+ } else if (gen >= 6) {
+ *batch++ = MI_STORE_DWORD_IMM_GEN4;
+ *batch++ = 0;
+ *batch++ = addr;
+ *batch++ = target_addr;
+ } else if (IS_I965G(eb->i915)) {
+ *batch++ = MI_STORE_DWORD_IMM_GEN4;
+ *batch++ = 0;
+ *batch++ = vma_phys_addr(vma, offset);
+ *batch++ = target_addr;
+ } else if (gen >= 4) {
+ *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *batch++ = 0;
+ *batch++ = addr;
+ *batch++ = target_addr;
+ } else if (gen >= 3 &&
+ !(IS_I915G(eb->i915) || IS_I915GM(eb->i915))) {
+ *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+ *batch++ = addr;
+ *batch++ = target_addr;
+ } else {
+ *batch++ = MI_STORE_DWORD_IMM;
+ *batch++ = vma_phys_addr(vma, offset);
+ *batch++ = target_addr;
}
+ return true;
+}
+
+static bool reloc_entry_gpu(struct i915_execbuffer *eb,
+ struct i915_vma *vma,
+ u64 offset,
+ u64 target_addr)
+{
+ if (eb->reloc_cache.vaddr)
+ return false;
+
+ if (!use_reloc_gpu(vma))
+ return false;
+
+ return __reloc_entry_gpu(eb, vma, offset, target_addr);
+}
+
+static u64
+relocate_entry(struct i915_vma *vma,
+ const struct drm_i915_gem_relocation_entry *reloc,
+ struct i915_execbuffer *eb,
+ const struct i915_vma *target)
+{
+ u64 target_addr = relocation_target(reloc, target);
+ u64 offset = reloc->offset;
+
+ if (!reloc_entry_gpu(eb, vma, offset, target_addr)) {
+ bool wide = eb->reloc_cache.use_64bit_reloc;
+ void *vaddr;
+
repeat:
- vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT);
- if (IS_ERR(vaddr))
- return PTR_ERR(vaddr);
+ vaddr = reloc_vaddr(vma->obj,
+ &eb->reloc_cache,
+ offset >> PAGE_SHIFT);
+ if (IS_ERR(vaddr))
+ return PTR_ERR(vaddr);
- clflush_write32(vaddr + offset_in_page(offset),
- lower_32_bits(target_offset),
- eb->reloc_cache.vaddr);
+ GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32)));
+ clflush_write32(vaddr + offset_in_page(offset),
+ lower_32_bits(target_addr),
+ eb->reloc_cache.vaddr);
- if (wide) {
- offset += sizeof(u32);
- target_offset >>= 32;
- wide = false;
- goto repeat;
+ if (wide) {
+ offset += sizeof(u32);
+ target_addr >>= 32;
+ wide = false;
+ goto repeat;
+ }
}
-out:
return target->node.start | UPDATE;
}
@@ -1411,12 +1676,11 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
{
#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
- struct drm_i915_gem_relocation_entry __user *urelocs;
const struct drm_i915_gem_exec_object2 *entry = ev->exec;
- unsigned int remain;
+ struct drm_i915_gem_relocation_entry __user *urelocs =
+ u64_to_user_ptr(entry->relocs_ptr);
+ unsigned long remain = entry->relocation_count;
- urelocs = u64_to_user_ptr(entry->relocs_ptr);
- remain = entry->relocation_count;
if (unlikely(remain > N_RELOC(ULONG_MAX)))
return -EINVAL;
@@ -1425,13 +1689,13 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
* to read. However, if the array is not writable the user loses
* the updated relocation values.
*/
- if (unlikely(!access_ok(urelocs, remain*sizeof(*urelocs))))
+ if (unlikely(!access_ok(urelocs, remain * sizeof(*urelocs))))
return -EFAULT;
do {
struct drm_i915_gem_relocation_entry *r = stack;
unsigned int count =
- min_t(unsigned int, remain, ARRAY_SIZE(stack));
+ min_t(unsigned long, remain, ARRAY_SIZE(stack));
unsigned int copied;
/*
@@ -1494,9 +1758,7 @@ static int eb_relocate(struct i915_execbuffer *eb)
{
int err;
- mutex_lock(&eb->gem_context->mutex);
err = eb_lookup_vmas(eb);
- mutex_unlock(&eb->gem_context->mutex);
if (err)
return err;
@@ -1509,15 +1771,20 @@ static int eb_relocate(struct i915_execbuffer *eb)
/* The objects are in their final locations, apply the relocations. */
if (eb->args->flags & __EXEC_HAS_RELOC) {
struct eb_vma *ev;
+ int flush;
list_for_each_entry(ev, &eb->relocs, reloc_link) {
err = eb_relocate_vma(eb, ev);
if (err)
- return err;
+ break;
}
+
+ flush = reloc_gpu_flush(&eb->reloc_cache);
+ if (!err)
+ err = flush;
}
- return 0;
+ return err;
}
static int eb_move_to_gpu(struct i915_execbuffer *eb)
@@ -1597,19 +1864,15 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
err = i915_vma_move_to_active(vma, eb->request, flags);
i915_vma_unlock(vma);
-
- __eb_unreserve_vma(vma, flags);
- i915_vma_put(vma);
-
- ev->vma = NULL;
+ eb_unreserve_vma(ev);
}
ww_acquire_fini(&acquire);
+ eb_vma_array_put(fetch_and_zero(&eb->array));
+
if (unlikely(err))
goto err_skip;
- eb->exec = NULL;
-
/* Unconditionally flush any chipset caches (for streaming writes). */
intel_gt_chipset_flush(eb->engine->gt);
return 0;
@@ -1784,7 +2047,7 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
dma_resv_add_excl_fence(shadow->resv, &pw->base.dma);
dma_resv_unlock(shadow->resv);
- dma_fence_work_commit(&pw->base);
+ dma_fence_work_commit_imm(&pw->base);
return 0;
err_batch_unlock:
@@ -1804,7 +2067,7 @@ err_free:
static int eb_parse(struct i915_execbuffer *eb)
{
struct drm_i915_private *i915 = eb->i915;
- struct intel_engine_pool_node *pool;
+ struct intel_gt_buffer_pool_node *pool;
struct i915_vma *shadow, *trampoline;
unsigned int len;
int err;
@@ -1827,7 +2090,7 @@ static int eb_parse(struct i915_execbuffer *eb)
len += I915_CMD_PARSER_TRAMPOLINE_SIZE;
}
- pool = intel_engine_get_pool(eb->engine, len);
+ pool = intel_gt_get_buffer_pool(eb->engine->gt, len);
if (IS_ERR(pool))
return PTR_ERR(pool);
@@ -1861,6 +2124,7 @@ static int eb_parse(struct i915_execbuffer *eb)
eb->vma[eb->buffer_count].vma = i915_vma_get(shadow);
eb->vma[eb->buffer_count].flags = __EXEC_OBJECT_HAS_PIN;
eb->batch = &eb->vma[eb->buffer_count++];
+ eb->vma[eb->buffer_count].vma = NULL;
eb->trampoline = trampoline;
eb->batch_start_offset = 0;
@@ -1874,7 +2138,7 @@ err_trampoline:
err_shadow:
i915_vma_unpin(shadow);
err:
- intel_engine_pool_put(pool);
+ intel_gt_buffer_pool_put(pool);
return err;
}
@@ -2318,39 +2582,13 @@ static void eb_request_add(struct i915_execbuffer *eb)
/* Check that the context wasn't destroyed before submission */
if (likely(!intel_context_is_closed(eb->context))) {
attr = eb->gem_context->sched;
-
- /*
- * Boost actual workloads past semaphores!
- *
- * With semaphores we spin on one engine waiting for another,
- * simply to reduce the latency of starting our work when
- * the signaler completes. However, if there is any other
- * work that we could be doing on this engine instead, that
- * is better utilisation and will reduce the overall duration
- * of the current work. To avoid PI boosting a semaphore
- * far in the distance past over useful work, we keep a history
- * of any semaphore use along our dependency chain.
- */
- if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
- attr.priority |= I915_PRIORITY_NOSEMAPHORE;
-
- /*
- * Boost priorities to new clients (new request flows).
- *
- * Allow interactive/synchronous clients to jump ahead of
- * the bulk clients. (FQ_CODEL)
- */
- if (list_empty(&rq->sched.signalers_list))
- attr.priority |= I915_PRIORITY_WAIT;
} else {
/* Serialise with context_close via the add_to_timeline */
i915_request_set_error_once(rq, -ENOENT);
__i915_request_skip(rq);
}
- local_bh_disable();
__i915_request_queue(rq, &attr);
- local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
/* Try to clean up the client's timeline after submitting the request */
if (prev)
@@ -2369,7 +2607,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
struct drm_i915_private *i915 = to_i915(dev);
struct i915_execbuffer eb;
struct dma_fence *in_fence = NULL;
- struct dma_fence *exec_fence = NULL;
struct sync_file *out_fence = NULL;
struct i915_vma *batch;
int out_fence_fd = -1;
@@ -2386,8 +2623,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
args->flags |= __EXEC_HAS_RELOC;
eb.exec = exec;
- eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
- eb.vma[0].vma = NULL;
eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
reloc_cache_init(&eb.reloc_cache, eb.i915);
@@ -2414,30 +2649,22 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (args->flags & I915_EXEC_IS_PINNED)
eb.batch_flags |= I915_DISPATCH_PINNED;
- if (args->flags & I915_EXEC_FENCE_IN) {
+#define IN_FENCES (I915_EXEC_FENCE_IN | I915_EXEC_FENCE_SUBMIT)
+ if (args->flags & IN_FENCES) {
+ if ((args->flags & IN_FENCES) == IN_FENCES)
+ return -EINVAL;
+
in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
if (!in_fence)
return -EINVAL;
}
-
- if (args->flags & I915_EXEC_FENCE_SUBMIT) {
- if (in_fence) {
- err = -EINVAL;
- goto err_in_fence;
- }
-
- exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
- if (!exec_fence) {
- err = -EINVAL;
- goto err_in_fence;
- }
- }
+#undef IN_FENCES
if (args->flags & I915_EXEC_FENCE_OUT) {
out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
if (out_fence_fd < 0) {
err = out_fence_fd;
- goto err_exec_fence;
+ goto err_in_fence;
}
}
@@ -2528,14 +2755,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
}
if (in_fence) {
- err = i915_request_await_dma_fence(eb.request, in_fence);
- if (err < 0)
- goto err_request;
- }
-
- if (exec_fence) {
- err = i915_request_await_execution(eb.request, exec_fence,
- eb.engine->bond_execute);
+ if (args->flags & I915_EXEC_FENCE_SUBMIT)
+ err = i915_request_await_execution(eb.request,
+ in_fence,
+ eb.engine->bond_execute);
+ else
+ err = i915_request_await_dma_fence(eb.request,
+ in_fence);
if (err < 0)
goto err_request;
}
@@ -2563,7 +2789,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
*/
eb.request->batch = batch;
if (batch->private)
- intel_engine_pool_mark_active(batch->private, eb.request);
+ intel_gt_buffer_pool_mark_active(batch->private, eb.request);
trace_i915_request_queue(eb.request, eb.batch_flags);
err = eb_submit(&eb, batch);
@@ -2592,10 +2818,8 @@ err_batch_unpin:
i915_vma_unpin(batch);
err_parse:
if (batch->private)
- intel_engine_pool_put(batch->private);
+ intel_gt_buffer_pool_put(batch->private);
err_vma:
- if (eb.exec)
- eb_release_vmas(&eb);
if (eb.trampoline)
i915_vma_unpin(eb.trampoline);
eb_unpin_engine(&eb);
@@ -2606,8 +2830,6 @@ err_destroy:
err_out_fence:
if (out_fence_fd != -1)
put_unused_fd(out_fence_fd);
-err_exec_fence:
- dma_fence_put(exec_fence);
err_in_fence:
dma_fence_put(in_fence);
return err;
@@ -2615,7 +2837,7 @@ err_in_fence:
static size_t eb_element_size(void)
{
- return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma);
+ return sizeof(struct drm_i915_gem_exec_object2);
}
static bool check_buffer_count(size_t count)
@@ -2671,7 +2893,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
/* Copy in the exec list from userland */
exec_list = kvmalloc_array(count, sizeof(*exec_list),
__GFP_NOWARN | GFP_KERNEL);
- exec2_list = kvmalloc_array(count + 1, eb_element_size(),
+ exec2_list = kvmalloc_array(count, eb_element_size(),
__GFP_NOWARN | GFP_KERNEL);
if (exec_list == NULL || exec2_list == NULL) {
drm_dbg(&i915->drm,
@@ -2749,8 +2971,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
if (err)
return err;
- /* Allocate an extra slot for use by the command parser */
- exec2_list = kvmalloc_array(count + 1, eb_element_size(),
+ exec2_list = kvmalloc_array(count, eb_element_size(),
__GFP_NOWARN | GFP_KERNEL);
if (exec2_list == NULL) {
drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n",
@@ -2818,3 +3039,7 @@ end:;
kvfree(exec2_list);
return err;
}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_execbuffer.c"
+#endif