diff options
author | Dave Airlie <airlied@redhat.com> | 2019-12-27 15:25:04 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2019-12-27 15:25:04 +1000 |
commit | 3ae3271443b337c1cd421a9b73d51c5c2de52977 (patch) | |
tree | 2ce6c36149f45e50af5b3c321fa2b5222f9b81ae /drivers/gpu/drm/i915/gem | |
parent | 5f773e551a3b977013df24d570d486645f326672 (diff) | |
parent | 3446c63a0f2a691fdc6fffaddc6e0c1285efc80c (diff) |
Merge tag 'drm-intel-next-2019-12-23' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
i915 features for v5.6:
- Separate hardware and uapi state (Maarten)
- Expose a number of sprite and plane formats (Ville)
- DDC symlink in HDMI connector sysfs directory (Andrzej Pietrasiewicz)
- Improve obj->mm.lock nesting lock annotation (Daniel)
(Includes lockdep changes)
- Selftest improvements across the board (Chris)
- ICL/TGL VDSC support on DSI (Jani, Vandita)
- TGL DSB fixes (Animesh, Lucas, Tvrtko)
- VBT parsing improvements and fixes (Lucas, Matt, José, Jani, Dan Carpenter)
- Fix LPSS vs. PMIC PWM backlight use on BYT/CHT (Hans)
(Includes ACPI+MFD changes)
- Display state, crtc, plane code refactoring (Ville)
- Set opregion chpd value to indicate the driver handles hotplug (Hans de Goede)
- DSI updates and fixes, TGL pipe D support, port mapping (José, Jani, Vandita)
- Make HDCP 2.2 support cover CFL (Juston Li)
- Fix CML PCI IDs and ULT (Shawn Lee)
- CMP-V PCH fix (Imre)
- TGL: Add another TGL PCH ID (James)
- EHL/JSL: Add new PCI IDs (James)
- Rename pipe update tracepoints (Ville)
- Fix FBC on GLK+ (Ville)
- GuC fixes and improvements (Daniele, Don Hiatt, Stuart Summers, Matthew Brost)
- Display debugfs improvements (Ville)
- Hotplug/irq fixes (Matt)
- PSR fixes and improvements (José)
- DRM_I915_GEM_MMAP_OFFSET ioctl (Abdiel)
- Static analysis fixes (Colin Ian King)
- Register sysctl path globally (Venkata Sandeep Dhanalakota)
- Introduce new macros for tracing (Venkata Sandeep Dhanalakota)
- Migrate gt towards intel_uncore_read/write (Andi)
- Add rps frequency translation helpers (Andi)
- Fix TGL transcoder clock off sequence (José)
- Fix TGL port A audio (Kai Vehmanen)
- TGL render decompression (DK)
- GEM/GT improvements and fixes across the board (Chris)
- Couple of backmerges (Jani)
Signed-off-by: Dave Airlie <airlied@redhat.com>
# gpg: Signature made Tue 24 Dec 2019 03:20:48 AM AEST
# gpg: using RSA key D398079D26ABEE6F
# gpg: Good signature from "Jani Nikula <jani.nikula@intel.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 1565 A65B 77B0 632E 1124 E59C D398 079D 26AB EE6F
# Conflicts:
# drivers/gpu/drm/i915/display/intel_fbc.c
# drivers/gpu/drm/i915/gt/intel_lrc.c
# drivers/gpu/drm/i915/i915_gem.c
From: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/87lfr3rkry.fsf@intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/gem')
31 files changed, 1630 insertions, 931 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index b9f504ba3b32..34be4c0ee7c5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -20,33 +20,31 @@ static void __do_clflush(struct drm_i915_gem_object *obj) { GEM_BUG_ON(!i915_gem_object_has_pages(obj)); drm_clflush_sg(obj->mm.pages); - intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU); + + i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); } static int clflush_work(struct dma_fence_work *base) { struct clflush *clflush = container_of(base, typeof(*clflush), base); - struct drm_i915_gem_object *obj = fetch_and_zero(&clflush->obj); + struct drm_i915_gem_object *obj = clflush->obj; int err; err = i915_gem_object_pin_pages(obj); if (err) - goto put; + return err; __do_clflush(obj); i915_gem_object_unpin_pages(obj); -put: - i915_gem_object_put(obj); - return err; + return 0; } static void clflush_release(struct dma_fence_work *base) { struct clflush *clflush = container_of(base, typeof(*clflush), base); - if (clflush->obj) - i915_gem_object_put(clflush->obj); + i915_gem_object_put(clflush->obj); } static const struct dma_fence_work_ops clflush_ops = { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 337ba17b1e0e..dc90b044a217 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -69,7 +69,9 @@ #include <drm/i915_drm.h> +#include "gt/intel_context.h" #include "gt/intel_engine_heartbeat.h" +#include "gt/intel_engine_pm.h" #include "gt/intel_engine_user.h" #include "gt/intel_lrc_reg.h" #include "gt/intel_ring.h" @@ -169,12 +171,80 @@ lookup_user_engine(struct i915_gem_context *ctx, return i915_gem_context_get_engine(ctx, idx); } +static struct i915_address_space * +context_get_vm_rcu(struct i915_gem_context *ctx) +{ + GEM_BUG_ON(!rcu_access_pointer(ctx->vm)); + + do { + struct i915_address_space *vm; + + /* + * We do not allow downgrading from full-ppgtt [to a shared + * global gtt], so ctx->vm cannot become NULL. + */ + vm = rcu_dereference(ctx->vm); + if (!kref_get_unless_zero(&vm->ref)) + continue; + + /* + * This ppgtt may have be reallocated between + * the read and the kref, and reassigned to a third + * context. In order to avoid inadvertent sharing + * of this ppgtt with that third context (and not + * src), we have to confirm that we have the same + * ppgtt after passing through the strong memory + * barrier implied by a successful + * kref_get_unless_zero(). + * + * Once we have acquired the current ppgtt of ctx, + * we no longer care if it is released from ctx, as + * it cannot be reallocated elsewhere. + */ + + if (vm == rcu_access_pointer(ctx->vm)) + return rcu_pointer_handoff(vm); + + i915_vm_put(vm); + } while (1); +} + +static void intel_context_set_gem(struct intel_context *ce, + struct i915_gem_context *ctx) +{ + GEM_BUG_ON(rcu_access_pointer(ce->gem_context)); + RCU_INIT_POINTER(ce->gem_context, ctx); + + if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) + ce->ring = __intel_context_ring_size(SZ_16K); + + if (rcu_access_pointer(ctx->vm)) { + struct i915_address_space *vm; + + rcu_read_lock(); + vm = context_get_vm_rcu(ctx); /* hmm */ + rcu_read_unlock(); + + i915_vm_put(ce->vm); + ce->vm = vm; + } + + GEM_BUG_ON(ce->timeline); + if (ctx->timeline) + ce->timeline = intel_timeline_get(ctx->timeline); + + if (ctx->sched.priority >= I915_PRIORITY_NORMAL && + intel_engine_has_semaphores(ce->engine)) + __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); +} + static void __free_engines(struct i915_gem_engines *e, unsigned int count) { while (count--) { if (!e->engines[count]) continue; + RCU_INIT_POINTER(e->engines[count]->gem_context, NULL); intel_context_put(e->engines[count]); } kfree(e); @@ -211,12 +281,14 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) GEM_BUG_ON(engine->legacy_idx >= I915_NUM_ENGINES); GEM_BUG_ON(e->engines[engine->legacy_idx]); - ce = intel_context_create(ctx, engine); + ce = intel_context_create(engine); if (IS_ERR(ce)) { __free_engines(e, e->num_engines + 1); return ERR_CAST(ce); } + intel_context_set_gem(ce, ctx); + e->engines[engine->legacy_idx] = ce; e->num_engines = max(e->num_engines, engine->legacy_idx); } @@ -236,14 +308,10 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) free_engines(rcu_access_pointer(ctx->engines)); mutex_destroy(&ctx->engines_mutex); - kfree(ctx->jump_whitelist); - if (ctx->timeline) intel_timeline_put(ctx->timeline); - kfree(ctx->name); put_pid(ctx->pid); - mutex_destroy(&ctx->mutex); kfree_rcu(ctx, rcu); @@ -389,15 +457,6 @@ static void kill_context(struct i915_gem_context *ctx) struct intel_context *ce; /* - * If we are already banned, it was due to a guilty request causing - * a reset and the entire context being evicted from the GPU. - */ - if (i915_gem_context_is_banned(ctx)) - return; - - i915_gem_context_set_banned(ctx); - - /* * Map the user's engine back to the actual engines; one virtual * engine will be mapped to multiple engines, and using ctx->engine[] * the same engine may be have multiple instances in the user's map. @@ -407,6 +466,9 @@ static void kill_context(struct i915_gem_context *ctx) for_each_gem_engine(ce, __context_engines_static(ctx), it) { struct intel_engine_cs *engine; + if (intel_context_set_banned(ce)) + continue; + /* * Check the current active state of this context; if we * are currently executing on the GPU we need to evict @@ -427,11 +489,29 @@ static void kill_context(struct i915_gem_context *ctx) } } +static void set_closed_name(struct i915_gem_context *ctx) +{ + char *s; + + /* Replace '[]' with '<>' to indicate closed in debug prints */ + + s = strrchr(ctx->name, '['); + if (!s) + return; + + *s = '<'; + + s = strchr(s + 1, ']'); + if (s) + *s = '>'; +} + static void context_close(struct i915_gem_context *ctx) { struct i915_address_space *vm; i915_gem_context_set_closed(ctx); + set_closed_name(ctx); mutex_lock(&ctx->mutex); @@ -529,9 +609,6 @@ __create_context(struct drm_i915_private *i915) for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; - ctx->jump_whitelist = NULL; - ctx->jump_whitelist_cmds = 0; - spin_lock(&i915->gem.contexts.lock); list_add_tail(&ctx->link, &i915->gem.contexts.list); spin_unlock(&i915->gem.contexts.lock); @@ -661,37 +738,6 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) return ctx; } -static void -destroy_kernel_context(struct i915_gem_context **ctxp) -{ - struct i915_gem_context *ctx; - - /* Keep the context ref so that we can free it immediately ourselves */ - ctx = i915_gem_context_get(fetch_and_zero(ctxp)); - GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); - - context_close(ctx); - i915_gem_context_free(ctx); -} - -struct i915_gem_context * -i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) -{ - struct i915_gem_context *ctx; - - ctx = i915_gem_create_context(i915, 0); - if (IS_ERR(ctx)) - return ctx; - - i915_gem_context_clear_bannable(ctx); - i915_gem_context_set_persistence(ctx); - ctx->sched.priority = I915_USER_PRIORITY(prio); - - GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); - - return ctx; -} - static void init_contexts(struct i915_gem_contexts *gc) { spin_lock_init(&gc->lock); @@ -701,32 +747,16 @@ static void init_contexts(struct i915_gem_contexts *gc) init_llist_head(&gc->free_list); } -int i915_gem_init_contexts(struct drm_i915_private *i915) +void i915_gem_init__contexts(struct drm_i915_private *i915) { - struct i915_gem_context *ctx; - - /* Reassure ourselves we are only called once */ - GEM_BUG_ON(i915->kernel_context); - init_contexts(&i915->gem.contexts); - - /* lowest priority; idle task */ - ctx = i915_gem_context_create_kernel(i915, I915_PRIORITY_MIN); - if (IS_ERR(ctx)) { - DRM_ERROR("Failed to create default global context\n"); - return PTR_ERR(ctx); - } - i915->kernel_context = ctx; - DRM_DEBUG_DRIVER("%s context support initialized\n", DRIVER_CAPS(i915)->has_logical_contexts ? "logical" : "fake"); - return 0; } void i915_gem_driver_release__contexts(struct drm_i915_private *i915) { - destroy_kernel_context(&i915->kernel_context); flush_work(&i915->gem.contexts.free_work); } @@ -757,12 +787,8 @@ static int gem_context_register(struct i915_gem_context *ctx, mutex_unlock(&ctx->mutex); ctx->pid = get_task_pid(current, PIDTYPE_PID); - ctx->name = kasprintf(GFP_KERNEL, "%s[%d]", - current->comm, pid_nr(ctx->pid)); - if (!ctx->name) { - ret = -ENOMEM; - goto err_pid; - } + snprintf(ctx->name, sizeof(ctx->name), "%s[%d]", + current->comm, pid_nr(ctx->pid)); /* And finally expose ourselves to userspace via the idr */ mutex_lock(&fpriv->context_idr_lock); @@ -771,8 +797,6 @@ static int gem_context_register(struct i915_gem_context *ctx, if (ret >= 0) goto out; - kfree(fetch_and_zero(&ctx->name)); -err_pid: put_pid(fetch_and_zero(&ctx->pid)); out: return ret; @@ -801,7 +825,6 @@ int i915_gem_context_open(struct drm_i915_private *i915, if (err < 0) goto err_ctx; - GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); GEM_BUG_ON(err > 0); return 0; @@ -1012,7 +1035,7 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, return -ENODEV; rcu_read_lock(); - vm = i915_vm_get(ctx->vm); + vm = context_get_vm_rcu(ctx); rcu_read_unlock(); ret = mutex_lock_interruptible(&file_priv->vm_idr_lock); @@ -1049,7 +1072,7 @@ static void set_ppgtt_barrier(void *data) static int emit_ppgtt_update(struct i915_request *rq, void *data) { - struct i915_address_space *vm = rq->hw_context->vm; + struct i915_address_space *vm = rq->context->vm; struct intel_engine_cs *engine = rq->engine; u32 base = engine->mmio_base; u32 *cs; @@ -1096,9 +1119,6 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data) } *cs++ = MI_NOOP; intel_ring_advance(rq, cs); - } else { - /* ppGTT is not part of the legacy context image */ - gen6_ppgtt_pin(i915_vm_to_ppgtt(vm)); } return 0; @@ -1106,10 +1126,20 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data) static bool skip_ppgtt_update(struct intel_context *ce, void *data) { + if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) + return true; + if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915)) - return !ce->state; - else - return !atomic_read(&ce->pin_count); + return false; + + if (!atomic_read(&ce->pin_count)) + return true; + + /* ppGTT is not part of the legacy context image */ + if (gen6_ppgtt_pin(i915_vm_to_ppgtt(ce->vm))) + return true; + + return false; } static int set_ppgtt(struct drm_i915_file_private *file_priv, @@ -1217,7 +1247,7 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) if (!intel_context_is_pinned(ce)) return 0; - rq = i915_request_create(ce->engine->kernel_context); + rq = intel_engine_create_kernel_request(ce->engine); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -1485,12 +1515,14 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data) } } - ce = intel_execlists_create_virtual(set->ctx, siblings, n); + ce = intel_execlists_create_virtual(siblings, n); if (IS_ERR(ce)) { err = PTR_ERR(ce); goto out_siblings; } + intel_context_set_gem(ce, set->ctx); + if (cmpxchg(&set->engines->engines[idx], NULL, ce)) { intel_context_put(ce); err = -EEXIST; @@ -1660,12 +1692,14 @@ set_engines(struct i915_gem_context *ctx, return -ENOENT; } - ce = intel_context_create(ctx, engine); + ce = intel_context_create(engine); if (IS_ERR(ce)) { __free_engines(set.engines, n); return PTR_ERR(ce); } + intel_context_set_gem(ce, ctx); + set.engines->engines[n] = ce; } set.engines->num_engines = num_engines; @@ -1806,6 +1840,44 @@ set_persistence(struct i915_gem_context *ctx, return __context_set_persistence(ctx, args->value); } +static void __apply_priority(struct intel_context *ce, void *arg) +{ + struct i915_gem_context *ctx = arg; + + if (!intel_engine_has_semaphores(ce->engine)) + return; + + if (ctx->sched.priority >= I915_PRIORITY_NORMAL) + intel_context_set_use_semaphores(ce); + else + intel_context_clear_use_semaphores(ce); +} + +static int set_priority(struct i915_gem_context *ctx, + const struct drm_i915_gem_context_param *args) +{ + s64 priority = args->value; + + if (args->size) + return -EINVAL; + + if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) + return -ENODEV; + + if (priority > I915_CONTEXT_MAX_USER_PRIORITY || + priority < I915_CONTEXT_MIN_USER_PRIORITY) + return -EINVAL; + + if (priority > I915_CONTEXT_DEFAULT_PRIORITY && + !capable(CAP_SYS_NICE)) + return -EPERM; + + ctx->sched.priority = I915_USER_PRIORITY(priority); + context_apply_all(ctx, __apply_priority, ctx); + + return 0; +} + static int ctx_setparam(struct drm_i915_file_private *fpriv, struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) @@ -1852,23 +1924,7 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, break; case I915_CONTEXT_PARAM_PRIORITY: - { - s64 priority = args->value; - - if (args->size) - ret = -EINVAL; - else if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) - ret = -ENODEV; - else if (priority > I915_CONTEXT_MAX_USER_PRIORITY || - priority < I915_CONTEXT_MIN_USER_PRIORITY) - ret = -EINVAL; - else if (priority > I915_CONTEXT_DEFAULT_PRIORITY && - !capable(CAP_SYS_NICE)) - ret = -EPERM; - else - ctx->sched.priority = - I915_USER_PRIORITY(priority); - } + ret = set_priority(ctx, args); break; case I915_CONTEXT_PARAM_SSEU: @@ -1948,20 +2004,23 @@ static int clone_engines(struct i915_gem_context *dst, */ if (intel_engine_is_virtual(engine)) clone->engines[n] = - intel_execlists_clone_virtual(dst, engine); + intel_execlists_clone_virtual(engine); else - clone->engines[n] = intel_context_create(dst, engine); + clone->engines[n] = intel_context_create(engine); if (IS_ERR_OR_NULL(clone->engines[n])) { __free_engines(clone, n); goto err_unlock; } + + intel_context_set_gem(clone->engines[n], dst); } clone->num_engines = n; user_engines = i915_gem_context_user_engines(src); i915_gem_context_unlock_engines(src); - free_engines(dst->engines); + /* Serialised by constructor */ + free_engines(__context_engines_static(dst)); RCU_INIT_POINTER(dst->engines, clone); if (user_engines) i915_gem_context_set_user_engines(dst); @@ -1996,7 +2055,8 @@ static int clone_sseu(struct i915_gem_context *dst, unsigned long n; int err; - clone = dst->engines; /* no locking required; sole access */ + /* no locking required; sole access under constructor*/ + clone = __context_engines_static(dst); if (e->num_engines != clone->num_engines) { err = -EINVAL; goto unlock; @@ -2041,47 +2101,21 @@ static int clone_vm(struct i915_gem_context *dst, struct i915_address_space *vm; int err = 0; - rcu_read_lock(); - do { - vm = rcu_dereference(src->vm); - if (!vm) - break; - - if (!kref_get_unless_zero(&vm->ref)) - continue; - - /* - * This ppgtt may have be reallocated between - * the read and the kref, and reassigned to a third - * context. In order to avoid inadvertent sharing - * of this ppgtt with that third context (and not - * src), we have to confirm that we have the same - * ppgtt after passing through the strong memory - * barrier implied by a successful - * kref_get_unless_zero(). - * - * Once we have acquired the current ppgtt of src, - * we no longer care if it is released from src, as - * it cannot be reallocated elsewhere. - */ - - if (vm == rcu_access_pointer(src->vm)) - break; + if (!rcu_access_pointer(src->vm)) + return 0; - i915_vm_put(vm); - } while (1); + rcu_read_lock(); + vm = context_get_vm_rcu(src); rcu_read_unlock(); - if (vm) { - if (!mutex_lock_interruptible(&dst->mutex)) { - __assign_ppgtt(dst, vm); - mutex_unlock(&dst->mutex); - } else { - err = -EINTR; - } - i915_vm_put(vm); + if (!mutex_lock_interruptible(&dst->mutex)) { + __assign_ppgtt(dst, vm); + mutex_unlock(&dst->mutex); + } else { + err = -EINTR; } + i915_vm_put(vm); return err; } @@ -2167,8 +2201,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, ext_data.fpriv = file->driver_priv; if (client_is_banned(ext_data.fpriv)) { DRM_DEBUG("client %s[%d] banned from creating ctx\n", - current->comm, - pid_nr(get_task_pid(current, PIDTYPE_PID))); + current->comm, task_pid_nr(current)); return -EIO; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 18e50a769a6e..14f3cc1b7583 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -91,26 +91,6 @@ static inline void i915_gem_context_clear_persistence(struct i915_gem_context *c clear_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags); } -static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx) -{ - return test_bit(CONTEXT_BANNED, &ctx->flags); -} - -static inline void i915_gem_context_set_banned(struct i915_gem_context *ctx) -{ - set_bit(CONTEXT_BANNED, &ctx->flags); -} - -static inline bool i915_gem_context_force_single_submission(const struct i915_gem_context *ctx) -{ - return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags); -} - -static inline void i915_gem_context_set_force_single_submission(struct i915_gem_context *ctx) -{ - __set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags); -} - static inline bool i915_gem_context_user_engines(const struct i915_gem_context *ctx) { @@ -129,31 +109,8 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx) clear_bit(CONTEXT_USER_ENGINES, &ctx->flags); } -static inline bool -i915_gem_context_nopreempt(const struct i915_gem_context *ctx) -{ - return test_bit(CONTEXT_NOPREEMPT, &ctx->flags); -} - -static inline void -i915_gem_context_set_nopreempt(struct i915_gem_context *ctx) -{ - set_bit(CONTEXT_NOPREEMPT, &ctx->flags); -} - -static inline void -i915_gem_context_clear_nopreempt(struct i915_gem_context *ctx) -{ - clear_bit(CONTEXT_NOPREEMPT, &ctx->flags); -} - -static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) -{ - return !ctx->file_priv; -} - /* i915_gem_context.c */ -int __must_check i915_gem_init_contexts(struct drm_i915_private *i915); +void i915_gem_init__contexts(struct drm_i915_private *i915); void i915_gem_driver_release__contexts(struct drm_i915_private *i915); int i915_gem_context_open(struct drm_i915_private *i915, @@ -178,9 +135,6 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -struct i915_gem_context * -i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio); - static inline struct i915_gem_context * i915_gem_context_get(struct i915_gem_context *ctx) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 3870dd5daaa0..017ca803ab47 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -100,15 +100,6 @@ struct i915_gem_context { */ struct pid *pid; - /** - * @name: arbitrary name - * - * A name is constructed for the context from the creator's process - * name, pid and user handle in order to uniquely identify the - * context in messages. - */ - const char *name; - /** link: place with &drm_i915_private.context_list */ struct list_head link; struct llist_node free_link; @@ -143,11 +134,8 @@ struct i915_gem_context { * @flags: small set of booleans */ unsigned long flags; -#define CONTEXT_BANNED 0 -#define CONTEXT_CLOSED 1 -#define CONTEXT_FORCE_SINGLE_SUBMISSION 2 -#define CONTEXT_USER_ENGINES 3 -#define CONTEXT_NOPREEMPT 4 +#define CONTEXT_CLOSED 0 +#define CONTEXT_USER_ENGINES 1 struct mutex mutex; @@ -177,12 +165,14 @@ struct i915_gem_context { */ struct radix_tree_root handles_vma; - /** jump_whitelist: Bit array for tracking cmds during cmdparsing - * Guarded by struct_mutex + /** + * @name: arbitrary name, used for user debug + * + * A name is constructed for the context from the creator's process + * name, pid and user handle in order to uniquely identify the + * context in messages. */ - unsigned long *jump_whitelist; - /** jump_whitelist_cmds: No of cmd slots available */ - u32 jump_whitelist_cmds; + char name[TASK_COMM_LEN + 8]; }; #endif /* __I915_GEM_CONTEXT_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 9937b4c341f1..0cc40e77bbd2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -12,6 +12,8 @@ #include "i915_gem_ioctls.h" #include "i915_gem_object.h" #include "i915_vma.h" +#include "i915_gem_lmem.h" +#include "i915_gem_mman.h" static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) { @@ -148,9 +150,17 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); obj->read_domains |= I915_GEM_DOMAIN_GTT; if (write) { + struct i915_vma *vma; + obj->read_domains = I915_GEM_DOMAIN_GTT; obj->write_domain = I915_GEM_DOMAIN_GTT; obj->mm.dirty = true; + + spin_lock(&obj->vma.lock); + for_each_ggtt_vma(vma, obj) + if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) + i915_vma_set_ggtt_write(vma); + spin_unlock(&obj->vma.lock); } i915_gem_object_unpin_pages(obj); @@ -175,138 +185,34 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { - struct i915_vma *vma; int ret; - assert_object_held(obj); - if (obj->cache_level == cache_level) return 0; - /* Inspect the list of currently bound VMA and unbind any that would - * be invalid given the new cache-level. This is principally to - * catch the issue of the CS prefetch crossing page boundaries and - * reading an invalid PTE on older architectures. - */ -restart: - list_for_each_entry(vma, &obj->vma.list, obj_link) { - if (!drm_mm_node_allocated(&vma->node)) - continue; - - if (i915_vma_is_pinned(vma)) { - DRM_DEBUG("can not change the cache level of pinned objects\n"); - return -EBUSY; - } - - if (!i915_vma_is_closed(vma) && - i915_gem_valid_gtt_space(vma, cache_level)) - continue; - - ret = i915_vma_unbind(vma); - if (ret) - return ret; - - /* As unbinding may affect other elements in the - * obj->vma_list (due to side-effects from retiring - * an active vma), play safe and restart the iterator. - */ - goto restart; - } + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT); + if (ret) + return ret; - /* We can reuse the existing drm_mm nodes but need to change the - * cache-level on the PTE. We could simply unbind them all and - * rebind with the correct cache-level on next use. However since - * we already have a valid slot, dma mapping, pages etc, we may as - * rewrite the PTE in the belief that doing so tramples upon less - * state and so involves less work. - */ - if (atomic_read(&obj->bind_count)) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); + ret = i915_gem_object_lock_interruptible(obj); + if (ret) + return ret; - /* Before we change the PTE, the GPU must not be accessing it. - * If we wait upon the object, we know that all the bound - * VMA are no longer active. - */ - ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT); - if (ret) - return ret; - - if (!HAS_LLC(i915) && cache_level != I915_CACHE_NONE) { - intel_wakeref_t wakeref = - intel_runtime_pm_get(&i915->runtime_pm); - - /* - * Access to snoopable pages through the GTT is - * incoherent and on some machines causes a hard - * lockup. Relinquish the CPU mmaping to force - * userspace to refault in the pages and we can - * then double check if the GTT mapping is still - * valid for that pointer access. - */ - ret = mutex_lock_interruptible(&i915->ggtt.vm.mutex); - if (ret) { - intel_runtime_pm_put(&i915->runtime_pm, - wakeref); - return ret; - } - - if (obj->userfault_count) - __i915_gem_object_release_mmap(obj); - - /* - * As we no longer need a fence for GTT access, - * we can relinquish it now (and so prevent having - * to steal a fence from someone else on the next - * fence request). Note GPU activity would have - * dropped the fence as all snoopable access is - * supposed to be linear. - */ - for_each_ggtt_vma(vma, obj) { - ret = i915_vma_revoke_fence(vma); - if (ret) - break; - } - mutex_unlock(&i915->ggtt.vm.mutex); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - if (ret) - return ret; - } else { - /* - * We either have incoherent backing store and - * so no GTT access or the architecture is fully - * coherent. In such cases, existing GTT mmaps - * ignore the cache bit in the PTE and we can - * rewrite it without confusing the GPU or having - * to force userspace to fault back in its mmaps. - */ - } - - list_for_each_entry(vma, &obj->vma.list, obj_link) { - if (!drm_mm_node_allocated(&vma->node)) - continue; - - /* Wait for an earlier async bind, need to rewrite it */ - ret = i915_vma_sync(vma); - if (ret) - return ret; - - ret = i915_vma_bind(vma, cache_level, PIN_UPDATE, NULL); - if (ret) - return ret; - } + /* Always invalidate stale cachelines */ + if (obj->cache_level != cache_level) { + i915_gem_object_set_cache_coherency(obj, cache_level); + obj->cache_dirty = true; } - list_for_each_entry(vma, &obj->vma.list, obj_link) { - if (i915_vm_has_cache_coloring(vma->vm)) - vma->node.color = cache_level; - } - i915_gem_object_set_cache_coherency(obj, cache_level); - obj->cache_dirty = true; /* Always invalidate stale cachelines */ + i915_gem_object_unlock(obj); - return 0; + /* The cache-level will be applied when each vma is rebound. */ + return i915_gem_object_unbind(obj, + I915_GEM_OBJECT_UNBIND_ACTIVE | + I915_GEM_OBJECT_UNBIND_BARRIER); } int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, @@ -387,20 +293,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, goto out; } - if (obj->cache_level == level) - goto out; - - ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (ret) - goto out; - - ret = i915_gem_object_lock_interruptible(obj); - if (ret == 0) { - ret = i915_gem_object_set_cache_level(obj, level); - i915_gem_object_unlock(obj); - } + ret = i915_gem_object_set_cache_level(obj, level); out: i915_gem_object_put(obj); @@ -419,10 +312,13 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, unsigned int flags) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_vma *vma; int ret; - assert_object_held(obj); + /* Frame buffer must be in LMEM (no migration yet) */ + if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) + return ERR_PTR(-EINVAL); /* * The display engine is not coherent with the LLC cache on gen6. As @@ -435,7 +331,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, * with that bit in the PTE to main memory with just one PIPE_CONTROL. */ ret = i915_gem_object_set_cache_level(obj, - HAS_WT(to_i915(obj->base.dev)) ? + HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE); if (ret) return ERR_PTR(ret); @@ -462,13 +358,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, vma->display_alignment = max_t(u64, vma->display_alignment, alignment); - __i915_gem_object_flush_for_display(obj); - - /* - * It should now be out of any other write domains, and we can update - * the domain values for our changes. - */ - obj->read_domains |= I915_GEM_DOMAIN_GTT; + i915_gem_object_flush_if_display(obj); return vma; } @@ -479,8 +369,11 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) struct i915_vma *vma; GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + if (!atomic_read(&obj->bind_count)) + return; mutex_lock(&i915->ggtt.vm.mutex); + spin_lock(&obj->vma.lock); for_each_ggtt_vma(vma, obj) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -488,6 +381,7 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) GEM_BUG_ON(vma->vm != &i915->ggtt.vm); list_move_tail(&vma->vm_link, &vma->vm->bound_list); } + spin_unlock(&obj->vma.lock); mutex_unlock(&i915->ggtt.vm.mutex); if (i915_gem_object_is_shrinkable(obj)) { @@ -664,7 +558,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, i915_gem_object_unlock(obj); if (write_domain) - intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU); + i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); out_unpin: i915_gem_object_unpin_pages(obj); @@ -784,7 +678,7 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, } out: - intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU); + i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); obj->mm.dirty = true; /* return with the pages pinned */ return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index f0998f1225af..cbd2bcade3c8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -25,6 +25,7 @@ #include "i915_gem_clflush.h" #include "i915_gem_context.h" #include "i915_gem_ioctls.h" +#include "i915_sw_fence_work.h" #include "i915_trace.h" enum { @@ -228,6 +229,7 @@ struct i915_execbuffer { struct i915_request *request; /** our request to build */ struct i915_vma *batch; /** identity of the batch obj/vma */ + struct i915_vma *trampoline; /** trampoline used for chaining */ /** actual size of execobj[] as we may extend it for the cmdparser */ unsigned int buffer_count; @@ -253,7 +255,6 @@ struct i915_execbuffer { bool has_fence : 1; bool needs_unfenced : 1; - struct intel_context *ce; struct i915_request *rq; u32 *rq_cmd; unsigned int rq_size; @@ -277,25 +278,6 @@ struct i915_execbuffer { #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags]) -/* - * Used to convert any address to canonical form. - * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS, - * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the - * addresses to be in a canonical form: - * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct - * canonical form [63:48] == [47]." - */ -#define GEN8_HIGH_ADDRESS_BIT 47 -static inline u64 gen8_canonical_addr(u64 address) -{ - return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); -} - -static inline u64 gen8_noncanonical_addr(u64 address) -{ - return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0); -} - static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) { return intel_engine_requires_cmd_parser(eb->engine) || @@ -748,9 +730,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) unsigned int i, batch; int err; - if (unlikely(i915_gem_context_is_banned(eb->gem_context))) - return -EIO; - INIT_LIST_HEAD(&eb->relocs); INIT_LIST_HEAD(&eb->unbound); @@ -886,9 +865,6 @@ static void eb_destroy(const struct i915_execbuffer *eb) { GEM_BUG_ON(eb->reloc_cache.rq); - if (eb->reloc_cache.ce) - intel_context_put(eb->reloc_cache.ce); - if (eb->lut_size > 0) kfree(eb->buckets); } @@ -912,7 +888,6 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->has_fence = cache->gen < 4; cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; cache->node.flags = 0; - cache->ce = NULL; cache->rq = NULL; cache->rq_size = 0; } @@ -1182,7 +1157,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_unmap; - rq = intel_context_create_request(cache->ce); + rq = i915_request_create(eb->context); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_unpin; @@ -1246,36 +1221,9 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, if (unlikely(!cache->rq)) { int err; - /* If we need to copy for the cmdparser, we will stall anyway */ - if (eb_use_cmdparser(eb)) - return ERR_PTR(-EWOULDBLOCK); - if (!intel_engine_can_store_dword(eb->engine)) return ERR_PTR(-ENODEV); - if (!cache->ce) { - struct intel_context *ce; - - /* - * The CS pre-parser can pre-fetch commands across - * memory sync points and starting gen12 it is able to - * pre-fetch across BB_START and BB_END boundaries - * (within the same context). We therefore use a - * separate context gen12+ to guarantee that the reloc - * writes land before the parser gets to the target - * memory location. - */ - if (cache->gen >= 12) - ce = intel_context_create(eb->context->gem_context, - eb->engine); - else - ce = intel_context_get(eb->context); - if (IS_ERR(ce)) - return ERR_CAST(ce); - - cache->ce = ce; - } - err = __reloc_gpu_alloc(eb, vma, len); if (unlikely(err)) return ERR_PTR(err); @@ -1943,15 +1891,15 @@ err_skip: return err; } -static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) +static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) { if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS) - return false; + return -EINVAL; /* Kernel clipping was a DRI1 misfeature */ if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) { if (exec->num_cliprects || exec->cliprects_ptr) - return false; + return -EINVAL; } if (exec->DR4 == 0xffffffff) { @@ -1959,12 +1907,12 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) exec->DR4 = 0; } if (exec->DR1 || exec->DR4) - return false; + return -EINVAL; if ((exec->batch_start_offset | exec->batch_len) & 0x7) - return false; + return -EINVAL; - return true; + return 0; } static int i915_reset_gen7_sol_offsets(struct i915_request *rq) @@ -1993,99 +1941,179 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq) } static struct i915_vma * -shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj) +shadow_batch_pin(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + unsigned int flags) { - struct drm_i915_private *dev_priv = eb->i915; - struct i915_vma * const vma = *eb->vma; - struct i915_address_space *vm; - u64 flags; + struct i915_vma *vma; + int err; - /* - * PPGTT backed shadow buffers must be mapped RO, to prevent - * post-scan tampering - */ - if (CMDPARSER_USES_GGTT(dev_priv)) { - flags = PIN_GLOBAL; - vm = &dev_priv->ggtt.vm; - } else if (vma->vm->has_read_only) { - flags = PIN_USER; - vm = vma->vm; - i915_gem_object_set_readonly(obj); - } else { - DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n"); - return ERR_PTR(-EINVAL); - } + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return vma; + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + return ERR_PTR(err); - return i915_gem_object_pin(obj, vm, NULL, 0, 0, flags); + return vma; } -static struct i915_vma *eb_parse(struct i915_execbuffer *eb) +struct eb_parse_work { + struct dma_fence_work base; + struct intel_engine_cs *engine; + struct i915_vma *batch; + struct i915_vma *shadow; + struct i915_vma *trampoline; + unsigned int batch_offset; + unsigned int batch_length; +}; + +static int __eb_parse(struct dma_fence_work *work) { - struct intel_engine_pool_node *pool; - struct i915_vma *vma; - u64 batch_start; - u64 shadow_batch_start; + struct eb_parse_work *pw = container_of(work, typeof(*pw), base); + + return intel_engine_cmd_parser(pw->engine, + pw->batch, + pw->batch_offset, + pw->batch_length, + pw->shadow, + pw->trampoline); +} + +static const struct dma_fence_work_ops eb_parse_ops = { + .name = "eb_parse", + .work = __eb_parse, +}; + +static int eb_parse_pipeline(struct i915_execbuffer *eb, + struct i915_vma *shadow, + struct i915_vma *trampoline) +{ + struct eb_parse_work *pw; int err; - pool = intel_engine_get_pool(eb->engine, eb->batch_len); - if (IS_ERR(pool)) - return ERR_CAST(pool); + pw = kzalloc(sizeof(*pw), GFP_KERNEL); + if (!pw) + return -ENOMEM; - vma = shadow_batch_pin(eb, pool->obj); - if (IS_ERR(vma)) - goto err; + dma_fence_work_init(&pw->base, &eb_parse_ops); - batch_start = gen8_canonical_addr(eb->batch->node.start) + - eb->batch_start_offset; + pw->engine = eb->engine; + pw->batch = eb->batch; + pw->batch_offset = eb->batch_start_offset; + pw->batch_length = eb->batch_len; + pw->shadow = shadow; + pw->trampoline = trampoline; - shadow_batch_start = gen8_canonical_addr(vma->node.start); + dma_resv_lock(pw->batch->resv, NULL); - err = intel_engine_cmd_parser(eb->gem_context, - eb->engine, - eb->batch->obj, - batch_start, - eb->batch_start_offset, - eb->batch_len, - pool->obj, - shadow_batch_start); + err = dma_resv_reserve_shared(pw->batch->resv, 1); + if (err) + goto err_batch_unlock; - if (err) { - i915_vma_unpin(vma); + /* Wait for all writes (and relocs) into the batch to complete */ + err = i915_sw_fence_await_reservation(&pw->base.chain, + pw->batch->resv, NULL, false, + 0, I915_FENCE_GFP); + if (err < 0) + goto err_batch_unlock; + + /* Keep the batch alive and unwritten as we parse */ + dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma); + + dma_resv_unlock(pw->batch->resv); + /* Force execution to wait for completion of the parser */ + dma_resv_lock(shadow->resv, NULL); + dma_resv_add_excl_fence(shadow->resv, &pw->base.dma); + dma_resv_unlock(shadow->resv); + + dma_fence_work_commit(&pw->base); + return 0; + +err_batch_unlock: + dma_resv_unlock(pw->batch->resv); + kfree(pw); + return err; +} + +static int eb_parse(struct i915_execbuffer *eb) +{ + struct intel_engine_pool_node *pool; + struct i915_vma *shadow, *trampoline; + unsigned int len; + int err; + + if (!eb_use_cmdparser(eb)) + return 0; + + len = eb->batch_len; + if (!CMDPARSER_USES_GGTT(eb->i915)) { /* - * Unsafe GGTT-backed buffers can still be submitted safely - * as non-secure. - * For PPGTT backing however, we have no choice but to forcibly - * reject unsafe buffers + * ppGTT backed shadow buffers must be mapped RO, to prevent + * post-scan tampering */ - if (CMDPARSER_USES_GGTT(eb->i915) && (err == -EACCES)) - /* Execute original buffer non-secure */ - vma = NULL; - else - vma = ERR_PTR(err); + if (!eb->context->vm->has_read_only) { + DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n"); + return -EINVAL; + } + } else { + len += I915_CMD_PARSER_TRAMPOLINE_SIZE; + } + + pool = intel_engine_get_pool(eb->engine, len); + if (IS_ERR(pool)) + return PTR_ERR(pool); + + shadow = shadow_batch_pin(pool->obj, eb->context->vm, PIN_USER); + if (IS_ERR(shadow)) { + err = PTR_ERR(shadow); goto err; } + i915_gem_object_set_readonly(shadow->obj); + + trampoline = NULL; + if (CMDPARSER_USES_GGTT(eb->i915)) { + trampoline = shadow; + + shadow = shadow_batch_pin(pool->obj, + &eb->engine->gt->ggtt->vm, + PIN_GLOBAL); + if (IS_ERR(shadow)) { + err = PTR_ERR(shadow); + shadow = trampoline; + goto err_shadow; + } + + eb->batch_flags |= I915_DISPATCH_SECURE; + } + + err = eb_parse_pipeline(eb, shadow, trampoline); + if (err) + goto err_trampoline; - eb->vma[eb->buffer_count] = i915_vma_get(vma); + eb->vma[eb->buffer_count] = i915_vma_get(shadow); eb->flags[eb->buffer_count] = __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; - vma->exec_flags = &eb->flags[eb->buffer_count]; + shadow->exec_flags = &eb->flags[eb->buffer_count]; eb->buffer_count++; + eb->trampoline = trampoline; eb->batch_start_offset = 0; - eb->batch = vma; + eb->batch = shadow; - if (CMDPARSER_USES_GGTT(eb->i915)) - eb->batch_flags |= I915_DISPATCH_SECURE; - - /* eb->batch_len unchanged */ - - vma->private = pool; - return vma; + shadow->private = pool; + return 0; +err_trampoline: + if (trampoline) + i915_vma_unpin(trampoline); +err_shadow: + i915_vma_unpin(shadow); err: intel_engine_pool_put(pool); - return vma; + return err; } static void @@ -2134,7 +2162,17 @@ static int eb_submit(struct i915_execbuffer *eb) if (err) return err; - if (i915_gem_context_nopreempt(eb->gem_context)) + if (eb->trampoline) { + GEM_BUG_ON(eb->batch_start_offset); + err = eb->engine->emit_bb_start(eb->request, + eb->trampoline->node.start + + eb->batch_len, + 0, 0); + if (err) + return err; + } + + if (intel_context_nopreempt(eb->context)) eb->request->flags |= I915_REQUEST_NOPREEMPT; return 0; @@ -2220,6 +2258,9 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) if (err) return err; + if (unlikely(intel_context_is_banned(ce))) + return -EIO; + /* * Pinning the contexts may generate requests in order to acquire * GGTT space, so do this first before we reserve a seqno for @@ -2515,6 +2556,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.buffer_count = args->buffer_count; eb.batch_start_offset = args->batch_start_offset; eb.batch_len = args->batch_len; + eb.trampoline = NULL; eb.batch_flags = 0; if (args->flags & I915_EXEC_SECURE) { @@ -2606,15 +2648,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (eb.batch_len == 0) eb.batch_len = eb.batch->size - eb.batch_start_offset; - if (eb_use_cmdparser(&eb)) { - struct i915_vma *vma; - - vma = eb_parse(&eb); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_vma; - } - } + err = eb_parse(&eb); + if (err) + goto err_vma; /* * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure @@ -2694,6 +2730,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, err = eb_submit(&eb); err_request: add_to_client(eb.request, file); + i915_request_get(eb.request); i915_request_add(eb.request); if (fences) @@ -2709,6 +2746,7 @@ err_request: fput(out_fence->file); } } + i915_request_put(eb.request); err_batch_unpin: if (eb.batch_flags & I915_DISPATCH_SECURE) @@ -2718,6 +2756,8 @@ err_batch_unpin: err_vma: if (eb.exec) eb_release_vmas(&eb); + if (eb.trampoline) + i915_vma_unpin(eb.trampoline); mutex_unlock(&dev->struct_mutex); err_engine: eb_unpin_engine(&eb); @@ -2787,8 +2827,9 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, exec2.flags = I915_EXEC_RENDER; i915_execbuffer2_set_context_id(exec2, 0); - if (!i915_gem_check_execbuffer(&exec2)) - return -EINVAL; + err = i915_gem_check_execbuffer(&exec2); + if (err) + return err; /* Copy in the exec list from userland */ exec_list = kvmalloc_array(count, sizeof(*exec_list), @@ -2865,8 +2906,9 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - if (!i915_gem_check_execbuffer(args)) - return -EINVAL; + err = i915_gem_check_execbuffer(args); + if (err) + return err; /* Allocate an extra slot for use by the command parser */ exec2_list = kvmalloc_array(count + 1, eb_element_size(), diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h index ddc7f2a52b3e..87d8b27f426d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h @@ -28,8 +28,8 @@ int i915_gem_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_gem_mmap_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); +int i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); int i915_gem_pread_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 0e2bf6b7e143..520cc9cac471 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -79,9 +79,6 @@ __i915_gem_lmem_object_create(struct intel_memory_region *mem, struct drm_i915_private *i915 = mem->i915; struct drm_i915_gem_object *obj; - if (size > BIT(mem->mm.max_order) * mem->mm.chunk_size) - return ERR_PTR(-E2BIG); - obj = i915_gem_object_alloc(); if (!obj) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index e3002849844b..879fff8adc48 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -5,6 +5,7 @@ */ #include <linux/mman.h> +#include <linux/pfn_t.h> #include <linux/sizes.h> #include "gt/intel_gt.h" @@ -14,7 +15,9 @@ #include "i915_gem_gtt.h" #include "i915_gem_ioctls.h" #include "i915_gem_object.h" +#include "i915_gem_mman.h" #include "i915_trace.h" +#include "i915_user_extensions.h" #include "i915_vma.h" static inline bool @@ -144,6 +147,9 @@ static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) * 3 - Remove implicit set-domain(GTT) and synchronisation on initial * pagefault; swapin remains transparent. * + * 4 - Support multiple fault handlers per object depending on object's + * backing storage (a.k.a. MMAP_OFFSET). + * * Restrictions: * * * snoopable objects cannot be accessed via the GTT. It can cause machine @@ -171,7 +177,7 @@ static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) */ int i915_gem_mmap_gtt_version(void) { - return 3; + return 4; } static inline struct i915_ggtt_view @@ -197,29 +203,83 @@ compute_partial_view(const struct drm_i915_gem_object *obj, return view; } -/** - * i915_gem_fault - fault a page into the GTT - * @vmf: fault info - * - * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped - * from userspace. The fault handler takes care of binding the object to - * the GTT (if needed), allocating and programming a fence register (again, - * only if needed based on whether the old reg is still valid or the object - * is tiled) and inserting a new PTE into the faulting process. - * - * Note that the faulting process may involve evicting existing objects - * from the GTT and/or fence registers to make room. So performance may - * suffer if the GTT working set is large or there are few fence registers - * left. - * - * The current feature set supported by i915_gem_fault() and thus GTT mmaps - * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). - */ -vm_fault_t i915_gem_fault(struct vm_fault *vmf) +static vm_fault_t i915_error_to_vmf_fault(int err) +{ + switch (err) { + default: + WARN_ONCE(err, "unhandled error in %s: %i\n", __func__, err); + /* fallthrough */ + case -EIO: /* shmemfs failure from swap device */ + case -EFAULT: /* purged object */ + case -ENODEV: /* bad object, how did you get here! */ + return VM_FAULT_SIGBUS; + + case -ENOSPC: /* shmemfs allocation failure */ + case -ENOMEM: /* our allocation failure */ + return VM_FAULT_OOM; + + case 0: + case -EAGAIN: + case -ERESTARTSYS: + case -EINTR: + case -EBUSY: + /* + * EBUSY is ok: this just means that another thread + * already did the job. + */ + return VM_FAULT_NOPAGE; + } +} + +static vm_fault_t vm_fault_cpu(struct vm_fault *vmf) +{ + struct vm_area_struct *area = vmf->vma; + struct i915_mmap_offset *mmo = area->vm_private_data; + struct drm_i915_gem_object *obj = mmo->obj; + unsigned long i, size = area->vm_end - area->vm_start; + bool write = area->vm_flags & VM_WRITE; + vm_fault_t ret = VM_FAULT_SIGBUS; + int err; + + if (!i915_gem_object_has_struct_page(obj)) + return ret; + + /* Sanity check that we allow writing into this object */ + if (i915_gem_object_is_readonly(obj) && write) + return ret; + + err = i915_gem_object_pin_pages(obj); + if (err) + return i915_error_to_vmf_fault(err); + + /* PTEs are revoked in obj->ops->put_pages() */ + for (i = 0; i < size >> PAGE_SHIFT; i++) { + struct page *page = i915_gem_object_get_page(obj, i); + + ret = vmf_insert_pfn(area, + (unsigned long)area->vm_start + i * PAGE_SIZE, + page_to_pfn(page)); + if (ret != VM_FAULT_NOPAGE) + break; + } + + if (write) { + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + obj->cache_dirty = true; /* XXX flush after PAT update? */ + obj->mm.dirty = true; + } + + i915_gem_object_unpin_pages(obj); + + return ret; +} + +static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) { #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) struct vm_area_struct *area = vmf->vma; - struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); + struct i915_mmap_offset *mmo = area->vm_private_data; + struct drm_i915_gem_object *obj = mmo->obj; struct drm_device *dev = obj->base.dev; struct drm_i915_private *i915 = to_i915(dev); struct intel_runtime_pm *rpm = &i915->runtime_pm; @@ -312,6 +372,9 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) list_add(&obj->userfault_link, &i915->ggtt.userfault_list); mutex_unlock(&i915->ggtt.vm.mutex); + /* Track the mmo associated with the fenced vma */ + vma->mmo = mmo; + if (IS_ACTIVE(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)) intel_wakeref_auto(&i915->ggtt.userfault_wakeref, msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)); @@ -332,67 +395,36 @@ err_rpm: intel_runtime_pm_put(rpm, wakeref); i915_gem_object_unpin_pages(obj); err: - switch (ret) { - default: - WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret); - /* fallthrough */ - case -EIO: /* shmemfs failure from swap device */ - case -EFAULT: /* purged object */ - case -ENODEV: /* bad object, how did you get here! */ - return VM_FAULT_SIGBUS; - - case -ENOSPC: /* shmemfs allocation failure */ - case -ENOMEM: /* our allocation failure */ - return VM_FAULT_OOM; - - case 0: - case -EAGAIN: - case -ERESTARTSYS: - case -EINTR: - case -EBUSY: - /* - * EBUSY is ok: this just means that another thread - * already did the job. - */ - return VM_FAULT_NOPAGE; - } + return i915_error_to_vmf_fault(ret); } -void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) +void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj) { struct i915_vma *vma; GEM_BUG_ON(!obj->userfault_count); - obj->userfault_count = 0; - list_del(&obj->userfault_link); - drm_vma_node_unmap(&obj->base.vma_node, - obj->base.dev->anon_inode->i_mapping); - for_each_ggtt_vma(vma, obj) - i915_vma_unset_userfault(vma); + i915_vma_revoke_mmap(vma); + + GEM_BUG_ON(obj->userfault_count); } -/** - * i915_gem_object_release_mmap - remove physical page mappings - * @obj: obj in question - * - * Preserve the reservation of the mmapping with the DRM core code, but - * relinquish ownership of the pages back to the system. - * +/* * It is vital that we remove the page mapping if we have mapped a tiled * object through the GTT and then lose the fence register due to * resource pressure. Similarly if the object has been moved out of the * aperture, than pages mapped into userspace must be revoked. Removing the * mapping will then trigger a page fault on the next user access, allowing - * fixup by i915_gem_fault(). + * fixup by vm_fault_gtt(). */ -void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) +static void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); intel_wakeref_t wakeref; - /* Serialisation between user GTT access and our code depends upon + /* + * Serialisation between user GTT access and our code depends upon * revoking the CPU's PTE whilst the mutex is held. The next user * pagefault then has to wait until we release the mutex. * @@ -406,9 +438,10 @@ void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) if (!obj->userfault_count) goto out; - __i915_gem_object_release_mmap(obj); + __i915_gem_object_release_mmap_gtt(obj); - /* Ensure that the CPU's PTE are revoked and there are not outstanding + /* + * Ensure that the CPU's PTE are revoked and there are not outstanding * memory transactions from userspace before we return. The TLB * flushing implied above by changing the PTE above *should* be * sufficient, an extra barrier here just provides us with a bit @@ -422,54 +455,149 @@ out: intel_runtime_pm_put(&i915->runtime_pm, wakeref); } -static int create_mmap_offset(struct drm_i915_gem_object *obj) +void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj) +{ + struct i915_mmap_offset *mmo; + + spin_lock(&obj->mmo.lock); + list_for_each_entry(mmo, &obj->mmo.offsets, offset) { + /* + * vma_node_unmap for GTT mmaps handled already in + * __i915_gem_object_release_mmap_gtt + */ + if (mmo->mmap_type == I915_MMAP_TYPE_GTT) + continue; + + spin_unlock(&obj->mmo.lock); + drm_vma_node_unmap(&mmo->vma_node, + obj->base.dev->anon_inode->i_mapping); + spin_lock(&obj->mmo.lock); + } + spin_unlock(&obj->mmo.lock); +} + +/** + * i915_gem_object_release_mmap - remove physical page mappings + * @obj: obj in question + * + * Preserve the reservation of the mmapping with the DRM core code, but + * relinquish ownership of the pages back to the system. + */ +void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) +{ + i915_gem_object_release_mmap_gtt(obj); + i915_gem_object_release_mmap_offset(obj); +} + +static struct i915_mmap_offset * +mmap_offset_attach(struct drm_i915_gem_object *obj, + enum i915_mmap_type mmap_type, + struct drm_file *file) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct intel_gt *gt = &i915->gt; + struct i915_mmap_offset *mmo; int err; - err = drm_gem_create_mmap_offset(&obj->base); + mmo = kmalloc(sizeof(*mmo), GFP_KERNEL); + if (!mmo) + return ERR_PTR(-ENOMEM); + + mmo->obj = obj; + mmo->dev = obj->base.dev; + mmo->file = file; + mmo->mmap_type = mmap_type; + drm_vma_node_reset(&mmo->vma_node); + + err = drm_vma_offset_add(mmo->dev->vma_offset_manager, &mmo->vma_node, + obj->base.size / PAGE_SIZE); if (likely(!err)) - return 0; + goto out; /* Attempt to reap some mmap space from dead objects */ - err = intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT); + err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT); if (err) - return err; + goto err; i915_gem_drain_freed_objects(i915); - return drm_gem_create_mmap_offset(&obj->base); + err = drm_vma_offset_add(mmo->dev->vma_offset_manager, &mmo->vma_node, + obj->base.size / PAGE_SIZE); + if (err) + goto err; + +out: + if (file) + drm_vma_node_allow(&mmo->vma_node, file); + + spin_lock(&obj->mmo.lock); + list_add(&mmo->offset, &obj->mmo.offsets); + spin_unlock(&obj->mmo.lock); + + return mmo; + +err: + kfree(mmo); + return ERR_PTR(err); } -int -i915_gem_mmap_gtt(struct drm_file *file, - struct drm_device *dev, - u32 handle, - u64 *offset) +static int +__assign_mmap_offset(struct drm_file *file, + u32 handle, + enum i915_mmap_type mmap_type, + u64 *offset) { struct drm_i915_gem_object *obj; - int ret; + struct i915_mmap_offset *mmo; + int err; obj = i915_gem_object_lookup(file, handle); if (!obj) return -ENOENT; - if (i915_gem_object_never_bind_ggtt(obj)) { - ret = -ENODEV; + if (mmap_type == I915_MMAP_TYPE_GTT && + i915_gem_object_never_bind_ggtt(obj)) { + err = -ENODEV; + goto out; + } + + if (mmap_type != I915_MMAP_TYPE_GTT && + !i915_gem_object_has_struct_page(obj)) { + err = -ENODEV; goto out; } - ret = create_mmap_offset(obj); - if (ret == 0) - *offset = drm_vma_node_offset_addr(&obj->base.vma_node); + mmo = mmap_offset_attach(obj, mmap_type, file); + if (IS_ERR(mmo)) { + err = PTR_ERR(mmo); + goto out; + } + *offset = drm_vma_node_offset_addr(&mmo->vma_node); + err = 0; out: i915_gem_object_put(obj); - return ret; + return err; +} + +int +i915_gem_dumb_mmap_offset(struct drm_file *file, + struct drm_device *dev, + u32 handle, + u64 *offset) +{ + enum i915_mmap_type mmap_type; + + if (boot_cpu_has(X86_FEATURE_PAT)) + mmap_type = I915_MMAP_TYPE_WC; + else if (!i915_ggtt_has_aperture(&to_i915(dev)->ggtt)) + return -ENODEV; + else + mmap_type = I915_MMAP_TYPE_GTT; + + return __assign_mmap_offset(file, handle, mmap_type, offset); } /** - * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing + * i915_gem_mmap_offset_ioctl - prepare an object for GTT mmap'ing * @dev: DRM device * @data: GTT mapping ioctl data * @file: GEM object info @@ -484,12 +612,179 @@ out: * userspace. */ int -i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) +i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) { - struct drm_i915_gem_mmap_gtt *args = data; + struct drm_i915_private *i915 = to_i915(dev); + struct drm_i915_gem_mmap_offset *args = data; + enum i915_mmap_type type; + int err; - return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); + /* + * Historically we failed to check args.pad and args.offset + * and so we cannot use those fields for user input and we cannot + * add -EINVAL for them as the ABI is fixed, i.e. old userspace + * may be feeding in garbage in those fields. + * + * if (args->pad) return -EINVAL; is verbotten! + */ + + err = i915_user_extensions(u64_to_user_ptr(args->extensions), + NULL, 0, NULL); + if (err) + return err; + + switch (args->flags) { + case I915_MMAP_OFFSET_GTT: + if (!i915_ggtt_has_aperture(&i915->ggtt)) + return -ENODEV; + type = I915_MMAP_TYPE_GTT; + break; + + case I915_MMAP_OFFSET_WC: + if (!boot_cpu_has(X86_FEATURE_PAT)) + return -ENODEV; + type = I915_MMAP_TYPE_WC; + break; + + case I915_MMAP_OFFSET_WB: + type = I915_MMAP_TYPE_WB; + break; + + case I915_MMAP_OFFSET_UC: + if (!boot_cpu_has(X86_FEATURE_PAT)) + return -ENODEV; + type = I915_MMAP_TYPE_UC; + break; + + default: + return -EINVAL; + } + + return __assign_mmap_offset(file, args->handle, type, &args->offset); +} + +static void vm_open(struct vm_area_struct *vma) +{ + struct i915_mmap_offset *mmo = vma->vm_private_data; + struct drm_i915_gem_object *obj = mmo->obj; + + GEM_BUG_ON(!obj); + i915_gem_object_get(obj); +} + +static void vm_close(struct vm_area_struct *vma) +{ + struct i915_mmap_offset *mmo = vma->vm_private_data; + struct drm_i915_gem_object *obj = mmo->obj; + + GEM_BUG_ON(!obj); + i915_gem_object_put(obj); +} + +static const struct vm_operations_struct vm_ops_gtt = { + .fault = vm_fault_gtt, + .open = vm_open, + .close = vm_close, +}; + +static const struct vm_operations_struct vm_ops_cpu = { + .fault = vm_fault_cpu, + .open = vm_open, + .close = vm_close, +}; + +/* + * This overcomes the limitation in drm_gem_mmap's assignment of a + * drm_gem_object as the vma->vm_private_data. Since we need to + * be able to resolve multiple mmap offsets which could be tied + * to a single gem object. + */ +int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct drm_vma_offset_node *node; + struct drm_file *priv = filp->private_data; + struct drm_device *dev = priv->minor->dev; + struct i915_mmap_offset *mmo = NULL; + struct drm_gem_object *obj = NULL; + + if (drm_dev_is_unplugged(dev)) + return -ENODEV; + + drm_vma_offset_lock_lookup(dev->vma_offset_manager); + node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager, + vma->vm_pgoff, + vma_pages(vma)); + if (likely(node)) { + mmo = container_of(node, struct i915_mmap_offset, + vma_node); + /* + * In our dependency chain, the drm_vma_offset_node + * depends on the validity of the mmo, which depends on + * the gem object. However the only reference we have + * at this point is the mmo (as the parent of the node). + * Try to check if the gem object was at least cleared. + */ + if (!mmo || !mmo->obj) { + drm_vma_offset_unlock_lookup(dev->vma_offset_manager); + return -EINVAL; + } + /* + * Skip 0-refcnted objects as it is in the process of being + * destroyed and will be invalid when the vma manager lock + * is released. + */ + obj = &mmo->obj->base; + if (!kref_get_unless_zero(&obj->refcount)) + obj = NULL; + } + drm_vma_offset_unlock_lookup(dev->vma_offset_manager); + if (!obj) + return -EINVAL; + + if (!drm_vma_node_is_allowed(node, priv)) { + drm_gem_object_put_unlocked(obj); + return -EACCES; + } + + if (i915_gem_object_is_readonly(to_intel_bo(obj))) { + if (vma->vm_flags & VM_WRITE) { + drm_gem_object_put_unlocked(obj); + return -EINVAL; + } + vma->vm_flags &= ~VM_MAYWRITE; + } + + vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_private_data = mmo; + + switch (mmo->mmap_type) { + case I915_MMAP_TYPE_WC: + vma->vm_page_prot = + pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); + vma->vm_ops = &vm_ops_cpu; + break; + + case I915_MMAP_TYPE_WB: + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + vma->vm_ops = &vm_ops_cpu; + break; + + case I915_MMAP_TYPE_UC: + vma->vm_page_prot = + pgprot_noncached(vm_get_page_prot(vma->vm_flags)); + vma->vm_ops = &vm_ops_cpu; + break; + + case I915_MMAP_TYPE_GTT: + vma->vm_page_prot = + pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); + vma->vm_ops = &vm_ops_gtt; + break; + } + vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); + + return 0; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.h b/drivers/gpu/drm/i915/gem/i915_gem_mman.h new file mode 100644 index 000000000000..862e01b7cb69 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.h @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_MMAN_H__ +#define __I915_GEM_MMAN_H__ + +#include <linux/mm_types.h> +#include <linux/types.h> + +struct drm_device; +struct drm_file; +struct drm_i915_gem_object; +struct file; +struct i915_mmap_offset; +struct mutex; + +int i915_gem_mmap_gtt_version(void); +int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma); + +int i915_gem_dumb_mmap_offset(struct drm_file *file_priv, + struct drm_device *dev, + u32 handle, u64 *offset); + +void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj); +void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj); +void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj); + +#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index a50296cce0d8..46bacc82ddc4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -22,11 +22,14 @@ * */ +#include <linux/sched/mm.h> + #include "display/intel_frontbuffer.h" #include "gt/intel_gt.h" #include "i915_drv.h" #include "i915_gem_clflush.h" #include "i915_gem_context.h" +#include "i915_gem_mman.h" #include "i915_gem_object.h" #include "i915_globals.h" #include "i915_trace.h" @@ -59,6 +62,9 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&obj->lut_list); + spin_lock_init(&obj->mmo.lock); + INIT_LIST_HEAD(&obj->mmo.offsets); + init_rcu_head(&obj->rcu); obj->ops = ops; @@ -95,6 +101,7 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) struct drm_i915_gem_object *obj = to_intel_bo(gem); struct drm_i915_file_private *fpriv = file->driver_priv; struct i915_lut_handle *lut, *ln; + struct i915_mmap_offset *mmo; LIST_HEAD(close); i915_gem_object_lock(obj); @@ -109,6 +116,17 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) } i915_gem_object_unlock(obj); + spin_lock(&obj->mmo.lock); + list_for_each_entry(mmo, &obj->mmo.offsets, offset) { + if (mmo->file != file) + continue; + + spin_unlock(&obj->mmo.lock); + drm_vma_node_revoke(&mmo->vma_node, file); + spin_lock(&obj->mmo.lock); + } + spin_unlock(&obj->mmo.lock); + list_for_each_entry_safe(lut, ln, &close, obj_link) { struct i915_gem_context *ctx = lut->ctx; struct i915_vma *vma; @@ -156,6 +174,8 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, wakeref = intel_runtime_pm_get(&i915->runtime_pm); llist_for_each_entry_safe(obj, on, freed, freed) { + struct i915_mmap_offset *mmo, *mn; + trace_i915_gem_object_destroy(obj); if (!list_empty(&obj->vma.list)) { @@ -174,19 +194,28 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, GEM_BUG_ON(vma->obj != obj); spin_unlock(&obj->vma.lock); - i915_vma_destroy(vma); + __i915_vma_put(vma); spin_lock(&obj->vma.lock); } spin_unlock(&obj->vma.lock); } + i915_gem_object_release_mmap(obj); + + list_for_each_entry_safe(mmo, mn, &obj->mmo.offsets, offset) { + drm_vma_offset_remove(obj->base.dev->vma_offset_manager, + &mmo->vma_node); + kfree(mmo); + } + INIT_LIST_HEAD(&obj->mmo.offsets); + GEM_BUG_ON(atomic_read(&obj->bind_count)); GEM_BUG_ON(obj->userfault_count); GEM_BUG_ON(!list_empty(&obj->lut_list)); atomic_set(&obj->mm.pages_pin_count, 0); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + __i915_gem_object_put_pages(obj); GEM_BUG_ON(i915_gem_object_has_pages(obj)); bitmap_free(obj->bit_17); @@ -277,18 +306,14 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, switch (obj->write_domain) { case I915_GEM_DOMAIN_GTT: - for_each_ggtt_vma(vma, obj) - intel_gt_flush_ggtt_writes(vma->vm->gt); - - intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU); - + spin_lock(&obj->vma.lock); for_each_ggtt_vma(vma, obj) { - if (vma->iomap) - continue; - - i915_vma_unset_ggtt_write(vma); + if (i915_vma_unset_ggtt_write(vma)) + intel_gt_flush_ggtt_writes(vma->vm->gt); } + spin_unlock(&obj->vma.lock); + i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); break; case I915_GEM_DOMAIN_WC: @@ -308,6 +333,30 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, obj->write_domain = 0; } +void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj, + enum fb_op_origin origin) +{ + struct intel_frontbuffer *front; + + front = __intel_frontbuffer_get(obj); + if (front) { + intel_frontbuffer_flush(front, origin); + intel_frontbuffer_put(front); + } +} + +void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj, + enum fb_op_origin origin) +{ + struct intel_frontbuffer *front; + + front = __intel_frontbuffer_get(obj); + if (front) { + intel_frontbuffer_invalidate(front, origin); + intel_frontbuffer_put(front); + } +} + void i915_gem_init__objects(struct drm_i915_private *i915) { INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 458cd51331f1..858f8bf49a04 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -13,8 +13,8 @@ #include <drm/i915_drm.h> +#include "display/intel_frontbuffer.h" #include "i915_gem_object_types.h" - #include "i915_gem_gtt.h" void i915_gem_init__objects(struct drm_i915_private *i915); @@ -132,13 +132,13 @@ void i915_gem_object_unlock_fence(struct drm_i915_gem_object *obj, static inline void i915_gem_object_set_readonly(struct drm_i915_gem_object *obj) { - obj->base.vma_node.readonly = true; + obj->flags |= I915_BO_READONLY; } static inline bool i915_gem_object_is_readonly(const struct drm_i915_gem_object *obj) { - return obj->base.vma_node.readonly; + return obj->flags & I915_BO_READONLY; } static inline bool @@ -271,10 +271,27 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj); int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj); +enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */ + I915_MM_NORMAL = 0, + /* + * Only used by struct_mutex, when called "recursively" from + * direct-reclaim-esque. Safe because there is only every one + * struct_mutex in the entire system. + */ + I915_MM_SHRINKER = 1, + /* + * Used for obj->mm.lock when allocating pages. Safe because the object + * isn't yet on any LRU, and therefore the shrinker can't deadlock on + * it. As soon as the object has pages, obj->mm.lock nests within + * fs_reclaim. + */ + I915_MM_GET_PAGES = 1, +}; + static inline int __must_check i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) { - might_lock(&obj->mm.lock); + might_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES); if (atomic_inc_not_zero(&obj->mm.pages_pin_count)) return 0; @@ -317,13 +334,7 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) __i915_gem_object_unpin_pages(obj); } -enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */ - I915_MM_NORMAL = 0, - I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */ -}; - -int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, - enum i915_mm_subclass subclass); +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj); void i915_gem_object_truncate(struct drm_i915_gem_object *obj); void i915_gem_object_writeback(struct drm_i915_gem_object *obj); @@ -376,9 +387,6 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj) i915_gem_object_unpin_pages(obj); } -void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj); -void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj); - void i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains); @@ -463,4 +471,25 @@ int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, const struct i915_sched_attr *attr); +void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj, + enum fb_op_origin origin); +void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj, + enum fb_op_origin origin); + +static inline void +i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj, + enum fb_op_origin origin) +{ + if (unlikely(rcu_access_pointer(obj->frontbuffer))) + __i915_gem_object_flush_frontbuffer(obj, origin); +} + +static inline void +i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj, + enum fb_op_origin origin) +{ + if (unlikely(rcu_access_pointer(obj->frontbuffer))) + __i915_gem_object_invalidate_frontbuffer(obj, origin); +} + #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 96008374a412..88e268633fdc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -63,6 +63,23 @@ struct drm_i915_gem_object_ops { void (*release)(struct drm_i915_gem_object *obj); }; +enum i915_mmap_type { + I915_MMAP_TYPE_GTT = 0, + I915_MMAP_TYPE_WC, + I915_MMAP_TYPE_WB, + I915_MMAP_TYPE_UC, +}; + +struct i915_mmap_offset { + struct drm_device *dev; + struct drm_vma_offset_node vma_node; + struct drm_i915_gem_object *obj; + struct drm_file *file; + enum i915_mmap_type mmap_type; + + struct list_head offset; +}; + struct drm_i915_gem_object { struct drm_gem_object base; @@ -118,12 +135,18 @@ struct drm_i915_gem_object { unsigned int userfault_count; struct list_head userfault_link; + struct { + spinlock_t lock; /* Protects access to mmo offsets */ + struct list_head offsets; + } mmo; + I915_SELFTEST_DECLARE(struct list_head st_link); unsigned long flags; #define I915_BO_ALLOC_CONTIGUOUS BIT(0) #define I915_BO_ALLOC_VOLATILE BIT(1) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | I915_BO_ALLOC_VOLATILE) +#define I915_BO_READONLY BIT(2) /* * Is the object to be mapped as read-only to the GPU @@ -150,7 +173,7 @@ struct drm_i915_gem_object { */ u16 write_domain; - struct intel_frontbuffer *frontbuffer; + struct intel_frontbuffer __rcu *frontbuffer; /** Current tiling stride for the object, if it's tiled. */ unsigned int tiling_and_stride; @@ -162,7 +185,11 @@ struct drm_i915_gem_object { atomic_t bind_count; struct { - struct mutex lock; /* protects the pages and their use */ + /* + * Protects the pages and their use. Do not use directly, but + * instead go through the pin/unpin interfaces. + */ + struct mutex lock; atomic_t pages_pin_count; atomic_t shrink_pin; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 29f4c2850745..75197ca696a8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -8,6 +8,7 @@ #include "i915_gem_object.h" #include "i915_scatterlist.h" #include "i915_gem_lmem.h" +#include "i915_gem_mman.h" void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages, @@ -106,7 +107,7 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) { int err; - err = mutex_lock_interruptible(&obj->mm.lock); + err = mutex_lock_interruptible_nested(&obj->mm.lock, I915_MM_GET_PAGES); if (err) return err; @@ -190,8 +191,7 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) return pages; } -int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, - enum i915_mm_subclass subclass) +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj) { struct sg_table *pages; int err; @@ -202,12 +202,14 @@ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, GEM_BUG_ON(atomic_read(&obj->bind_count)); /* May be called by shrinker from within get_pages() (on another bo) */ - mutex_lock_nested(&obj->mm.lock, subclass); + mutex_lock(&obj->mm.lock); if (unlikely(atomic_read(&obj->mm.pages_pin_count))) { err = -EBUSY; goto unlock; } + i915_gem_object_release_mmap_offset(obj); + /* * ->put_pages might need to allocate memory for the bit17 swizzle * array, hence protect them from being reaped by removing them from gtt @@ -308,7 +310,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, if (!i915_gem_object_type_has(obj, flags)) return ERR_PTR(-ENXIO); - err = mutex_lock_interruptible(&obj->mm.lock); + err = mutex_lock_interruptible_nested(&obj->mm.lock, I915_MM_GET_PAGES); if (err) return ERR_PTR(err); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 8043ff63d73f..b1b7c1b3038a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -164,7 +164,7 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) if (err) return err; - mutex_lock(&obj->mm.lock); + mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES); if (obj->mm.madv != I915_MADV_WILLNEED) { err = -EFAULT; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index f88ee1317bb4..c8264eb036bf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -13,7 +13,7 @@ void i915_gem_suspend(struct drm_i915_private *i915) { - GEM_TRACE("\n"); + GEM_TRACE("%s\n", dev_name(i915->drm.dev)); intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0); flush_workqueue(i915->wq); @@ -99,30 +99,12 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) void i915_gem_resume(struct drm_i915_private *i915) { - GEM_TRACE("\n"); - - intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - - if (intel_gt_init_hw(&i915->gt)) - goto err_wedged; + GEM_TRACE("%s\n", dev_name(i915->drm.dev)); /* * As we didn't flush the kernel context before suspend, we cannot * guarantee that the context image is complete. So let's just reset * it and start again. */ - if (intel_gt_resume(&i915->gt)) - goto err_wedged; - -out_unlock: - intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); - return; - -err_wedged: - if (!intel_gt_is_wedged(&i915->gt)) { - dev_err(i915->drm.dev, - "Failed to re-initialize GPU, declaring it wedged!\n"); - intel_gt_set_wedged(&i915->gt); - } - goto out_unlock; + intel_gt_resume(&i915->gt); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index 2f7bcfb9c964..d50adac12249 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -85,7 +85,7 @@ i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj) } prev_end = offset + block_size; - }; + } sg_page_sizes |= sg->length; sg_mark_end(sg); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index f2418a1cfe68..f7e4b39c734f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -57,7 +57,7 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj, flags = I915_GEM_OBJECT_UNBIND_ACTIVE; if (i915_gem_object_unbind(obj, flags) == 0) - __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); + __i915_gem_object_put_pages(obj); return !i915_gem_object_has_pages(obj); } @@ -209,8 +209,7 @@ i915_gem_shrink(struct drm_i915_private *i915, if (unsafe_drop_pages(obj, shrink)) { /* May arrive from get_pages on another bo */ - mutex_lock_nested(&obj->mm.lock, - I915_MM_SHRINKER); + mutex_lock(&obj->mm.lock); if (!i915_gem_object_has_pages(obj)) { try_to_writeback(obj, shrink); count += obj->base.size >> PAGE_SHIFT; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index a2d49c04e6a4..afb08a1704a2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -26,48 +26,49 @@ * for is a boon. */ -int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, +int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *i915, struct drm_mm_node *node, u64 size, unsigned alignment, u64 start, u64 end) { int ret; - if (!drm_mm_initialized(&dev_priv->mm.stolen)) + if (!drm_mm_initialized(&i915->mm.stolen)) return -ENODEV; /* WaSkipStolenMemoryFirstPage:bdw+ */ - if (INTEL_GEN(dev_priv) >= 8 && start < 4096) + if (INTEL_GEN(i915) >= 8 && start < 4096) start = 4096; - mutex_lock(&dev_priv->mm.stolen_lock); - ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node, + mutex_lock(&i915->mm.stolen_lock); + ret = drm_mm_insert_node_in_range(&i915->mm.stolen, node, size, alignment, 0, start, end, DRM_MM_INSERT_BEST); - mutex_unlock(&dev_priv->mm.stolen_lock); + mutex_unlock(&i915->mm.stolen_lock); return ret; } -int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv, +int i915_gem_stolen_insert_node(struct drm_i915_private *i915, struct drm_mm_node *node, u64 size, unsigned alignment) { - return i915_gem_stolen_insert_node_in_range(dev_priv, node, size, + return i915_gem_stolen_insert_node_in_range(i915, node, size, alignment, 0, U64_MAX); } -void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, +void i915_gem_stolen_remove_node(struct drm_i915_private *i915, struct drm_mm_node *node) { - mutex_lock(&dev_priv->mm.stolen_lock); + mutex_lock(&i915->mm.stolen_lock); drm_mm_remove_node(node); - mutex_unlock(&dev_priv->mm.stolen_lock); + mutex_unlock(&i915->mm.stolen_lock); } -static int i915_adjust_stolen(struct drm_i915_private *dev_priv, +static int i915_adjust_stolen(struct drm_i915_private *i915, struct resource *dsm) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct i915_ggtt *ggtt = &i915->ggtt; + struct intel_uncore *uncore = ggtt->vm.gt->uncore; struct resource *r; if (dsm->start == 0 || dsm->end <= dsm->start) @@ -79,14 +80,14 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv, */ /* Make sure we don't clobber the GTT if it's within stolen memory */ - if (INTEL_GEN(dev_priv) <= 4 && - !IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) { + if (INTEL_GEN(i915) <= 4 && + !IS_G33(i915) && !IS_PINEVIEW(i915) && !IS_G4X(i915)) { struct resource stolen[2] = {*dsm, *dsm}; struct resource ggtt_res; resource_size_t ggtt_start; - ggtt_start = I915_READ(PGTBL_CTL); - if (IS_GEN(dev_priv, 4)) + ggtt_start = intel_uncore_read(uncore, PGTBL_CTL); + if (IS_GEN(i915, 4)) ggtt_start = (ggtt_start & PGTBL_ADDRESS_LO_MASK) | (ggtt_start & PGTBL_ADDRESS_HI_MASK) << 28; else @@ -120,7 +121,7 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv, * kernel. So if the region is already marked as busy, something * is seriously wrong. */ - r = devm_request_mem_region(dev_priv->drm.dev, dsm->start, + r = devm_request_mem_region(i915->drm.dev, dsm->start, resource_size(dsm), "Graphics Stolen Memory"); if (r == NULL) { @@ -133,14 +134,14 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv, * reservation starting from 1 instead of 0. * There's also BIOS with off-by-one on the other end. */ - r = devm_request_mem_region(dev_priv->drm.dev, dsm->start + 1, + r = devm_request_mem_region(i915->drm.dev, dsm->start + 1, resource_size(dsm) - 2, "Graphics Stolen Memory"); /* * GEN3 firmware likes to smash pci bridges into the stolen * range. Apparently this works. */ - if (r == NULL && !IS_GEN(dev_priv, 3)) { + if (!r && !IS_GEN(i915, 3)) { DRM_ERROR("conflict detected with stolen region: %pR\n", dsm); @@ -151,25 +152,27 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv, return 0; } -static void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv) +static void i915_gem_cleanup_stolen(struct drm_i915_private *i915) { - if (!drm_mm_initialized(&dev_priv->mm.stolen)) + if (!drm_mm_initialized(&i915->mm.stolen)) return; - drm_mm_takedown(&dev_priv->mm.stolen); + drm_mm_takedown(&i915->mm.stolen); } -static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, +static void g4x_get_stolen_reserved(struct drm_i915_private *i915, + struct intel_uncore *uncore, resource_size_t *base, resource_size_t *size) { - u32 reg_val = I915_READ(IS_GM45(dev_priv) ? - CTG_STOLEN_RESERVED : - ELK_STOLEN_RESERVED); - resource_size_t stolen_top = dev_priv->dsm.end + 1; + u32 reg_val = intel_uncore_read(uncore, + IS_GM45(i915) ? + CTG_STOLEN_RESERVED : + ELK_STOLEN_RESERVED); + resource_size_t stolen_top = i915->dsm.end + 1; DRM_DEBUG_DRIVER("%s_STOLEN_RESERVED = %08x\n", - IS_GM45(dev_priv) ? "CTG" : "ELK", reg_val); + IS_GM45(i915) ? "CTG" : "ELK", reg_val); if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0) return; @@ -178,7 +181,7 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, * Whether ILK really reuses the ELK register for this is unclear. * Let's see if we catch anyone with this supposedly enabled on ILK. */ - WARN(IS_GEN(dev_priv, 5), "ILK stolen reserved found? 0x%08x\n", + WARN(IS_GEN(i915, 5), "ILK stolen reserved found? 0x%08x\n", reg_val); if (!(reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK)) @@ -190,11 +193,12 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, *size = stolen_top - *base; } -static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, +static void gen6_get_stolen_reserved(struct drm_i915_private *i915, + struct intel_uncore *uncore, resource_size_t *base, resource_size_t *size) { - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED); DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); @@ -222,12 +226,13 @@ static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, } } -static void vlv_get_stolen_reserved(struct drm_i915_private *dev_priv, +static void vlv_get_stolen_reserved(struct drm_i915_private *i915, + struct intel_uncore *uncore, resource_size_t *base, resource_size_t *size) { - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); - resource_size_t stolen_top = dev_priv->dsm.end + 1; + u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED); + resource_size_t stolen_top = i915->dsm.end + 1; DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); @@ -250,11 +255,12 @@ static void vlv_get_stolen_reserved(struct drm_i915_private *dev_priv, *base = stolen_top - *size; } -static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, +static void gen7_get_stolen_reserved(struct drm_i915_private *i915, + struct intel_uncore *uncore, resource_size_t *base, resource_size_t *size) { - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED); DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); @@ -276,11 +282,12 @@ static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, } } -static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, +static void chv_get_stolen_reserved(struct drm_i915_private *i915, + struct intel_uncore *uncore, resource_size_t *base, resource_size_t *size) { - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED); DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); @@ -308,12 +315,13 @@ static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, } } -static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, +static void bdw_get_stolen_reserved(struct drm_i915_private *i915, + struct intel_uncore *uncore, resource_size_t *base, resource_size_t *size) { - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); - resource_size_t stolen_top = dev_priv->dsm.end + 1; + u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED); + resource_size_t stolen_top = i915->dsm.end + 1; DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); @@ -328,10 +336,11 @@ static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void icl_get_stolen_reserved(struct drm_i915_private *i915, + struct intel_uncore *uncore, resource_size_t *base, resource_size_t *size) { - u64 reg_val = intel_uncore_read64(&i915->uncore, GEN6_STOLEN_RESERVED); + u64 reg_val = intel_uncore_read64(uncore, GEN6_STOLEN_RESERVED); DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val); @@ -356,22 +365,23 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915, } } -static int i915_gem_init_stolen(struct drm_i915_private *dev_priv) +static int i915_gem_init_stolen(struct drm_i915_private *i915) { + struct intel_uncore *uncore = &i915->uncore; resource_size_t reserved_base, stolen_top; resource_size_t reserved_total, reserved_size; - mutex_init(&dev_priv->mm.stolen_lock); + mutex_init(&i915->mm.stolen_lock); - if (intel_vgpu_active(dev_priv)) { - dev_notice(dev_priv->drm.dev, + if (intel_vgpu_active(i915)) { + dev_notice(i915->drm.dev, "%s, disabling use of stolen memory\n", "iGVT-g active"); return 0; } - if (intel_vtd_active() && INTEL_GEN(dev_priv) < 8) { - dev_notice(dev_priv->drm.dev, + if (intel_vtd_active() && INTEL_GEN(i915) < 8) { + dev_notice(i915->drm.dev, "%s, disabling use of stolen memory\n", "DMAR active"); return 0; @@ -380,58 +390,59 @@ static int i915_gem_init_stolen(struct drm_i915_private *dev_priv) if (resource_size(&intel_graphics_stolen_res) == 0) return 0; - dev_priv->dsm = intel_graphics_stolen_res; + i915->dsm = intel_graphics_stolen_res; - if (i915_adjust_stolen(dev_priv, &dev_priv->dsm)) + if (i915_adjust_stolen(i915, &i915->dsm)) return 0; - GEM_BUG_ON(dev_priv->dsm.start == 0); - GEM_BUG_ON(dev_priv->dsm.end <= dev_priv->dsm.start); + GEM_BUG_ON(i915->dsm.start == 0); + GEM_BUG_ON(i915->dsm.end <= i915->dsm.start); - stolen_top = dev_priv->dsm.end + 1; + stolen_top = i915->dsm.end + 1; reserved_base = stolen_top; reserved_size = 0; - switch (INTEL_GEN(dev_priv)) { + switch (INTEL_GEN(i915)) { case 2: case 3: break; case 4: - if (!IS_G4X(dev_priv)) + if (!IS_G4X(i915)) break; /* fall through */ case 5: - g4x_get_stolen_reserved(dev_priv, + g4x_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); break; case 6: - gen6_get_stolen_reserved(dev_priv, + gen6_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); break; case 7: - if (IS_VALLEYVIEW(dev_priv)) - vlv_get_stolen_reserved(dev_priv, + if (IS_VALLEYVIEW(i915)) + vlv_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); else - gen7_get_stolen_reserved(dev_priv, + gen7_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); break; case 8: case 9: case 10: - if (IS_LP(dev_priv)) - chv_get_stolen_reserved(dev_priv, + if (IS_LP(i915)) + chv_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); else - bdw_get_stolen_reserved(dev_priv, + bdw_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); break; default: - MISSING_CASE(INTEL_GEN(dev_priv)); + MISSING_CASE(INTEL_GEN(i915)); /* fall-through */ case 11: case 12: - icl_get_stolen_reserved(dev_priv, &reserved_base, + icl_get_stolen_reserved(i915, uncore, + &reserved_base, &reserved_size); break; } @@ -448,12 +459,12 @@ static int i915_gem_init_stolen(struct drm_i915_private *dev_priv) reserved_size = 0; } - dev_priv->dsm_reserved = - (struct resource) DEFINE_RES_MEM(reserved_base, reserved_size); + i915->dsm_reserved = + (struct resource)DEFINE_RES_MEM(reserved_base, reserved_size); - if (!resource_contains(&dev_priv->dsm, &dev_priv->dsm_reserved)) { + if (!resource_contains(&i915->dsm, &i915->dsm_reserved)) { DRM_ERROR("Stolen reserved area %pR outside stolen memory %pR\n", - &dev_priv->dsm_reserved, &dev_priv->dsm); + &i915->dsm_reserved, &i915->dsm); return 0; } @@ -462,14 +473,14 @@ static int i915_gem_init_stolen(struct drm_i915_private *dev_priv) reserved_total = stolen_top - reserved_base; DRM_DEBUG_DRIVER("Memory reserved for graphics device: %lluK, usable: %lluK\n", - (u64)resource_size(&dev_priv->dsm) >> 10, - ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10); + (u64)resource_size(&i915->dsm) >> 10, + ((u64)resource_size(&i915->dsm) - reserved_total) >> 10); - dev_priv->stolen_usable_size = - resource_size(&dev_priv->dsm) - reserved_total; + i915->stolen_usable_size = + resource_size(&i915->dsm) - reserved_total; /* Basic memrange allocator for stolen space. */ - drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->stolen_usable_size); + drm_mm_init(&i915->mm.stolen, 0, i915->stolen_usable_size); return 0; } @@ -478,11 +489,11 @@ static struct sg_table * i915_pages_create_for_stolen(struct drm_device *dev, resource_size_t offset, resource_size_t size) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *i915 = to_i915(dev); struct sg_table *st; struct scatterlist *sg; - GEM_BUG_ON(range_overflows(offset, size, resource_size(&dev_priv->dsm))); + GEM_BUG_ON(range_overflows(offset, size, resource_size(&i915->dsm))); /* We hide that we have no struct page backing our stolen object * by wrapping the contiguous physical allocation with a fake @@ -502,7 +513,7 @@ i915_pages_create_for_stolen(struct drm_device *dev, sg->offset = 0; sg->length = size; - sg_dma_address(sg) = (dma_addr_t)dev_priv->dsm.start + offset; + sg_dma_address(sg) = (dma_addr_t)i915->dsm.start + offset; sg_dma_len(sg) = size; return st; @@ -533,16 +544,15 @@ static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj, static void i915_gem_object_release_stolen(struct drm_i915_gem_object *obj) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct drm_mm_node *stolen = fetch_and_zero(&obj->stolen); GEM_BUG_ON(!stolen); - i915_gem_stolen_remove_node(dev_priv, stolen); - kfree(stolen); + i915_gem_object_release_memory_region(obj); - if (obj->mm.region) - i915_gem_object_release_memory_region(obj); + i915_gem_stolen_remove_node(i915, stolen); + kfree(stolen); } static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = { @@ -552,9 +562,8 @@ static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = { }; static struct drm_i915_gem_object * -__i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, - struct drm_mm_node *stolen, - struct intel_memory_region *mem) +__i915_gem_object_create_stolen(struct intel_memory_region *mem, + struct drm_mm_node *stolen) { static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; @@ -565,20 +574,19 @@ __i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, if (!obj) goto err; - drm_gem_private_object_init(&dev_priv->drm, &obj->base, stolen->size); + drm_gem_private_object_init(&mem->i915->drm, &obj->base, stolen->size); i915_gem_object_init(obj, &i915_gem_object_stolen_ops, &lock_class); obj->stolen = stolen; obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; - cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE; + cache_level = HAS_LLC(mem->i915) ? I915_CACHE_LLC : I915_CACHE_NONE; i915_gem_object_set_cache_coherency(obj, cache_level); err = i915_gem_object_pin_pages(obj); if (err) goto cleanup; - if (mem) - i915_gem_object_init_memory_region(obj, mem, 0); + i915_gem_object_init_memory_region(obj, mem, 0); return obj; @@ -593,12 +601,12 @@ _i915_gem_object_create_stolen(struct intel_memory_region *mem, resource_size_t size, unsigned int flags) { - struct drm_i915_private *dev_priv = mem->i915; + struct drm_i915_private *i915 = mem->i915; struct drm_i915_gem_object *obj; struct drm_mm_node *stolen; int ret; - if (!drm_mm_initialized(&dev_priv->mm.stolen)) + if (!drm_mm_initialized(&i915->mm.stolen)) return ERR_PTR(-ENODEV); if (size == 0) @@ -608,30 +616,30 @@ _i915_gem_object_create_stolen(struct intel_memory_region *mem, if (!stolen) return ERR_PTR(-ENOMEM); - ret = i915_gem_stolen_insert_node(dev_priv, stolen, size, 4096); + ret = i915_gem_stolen_insert_node(i915, stolen, size, 4096); if (ret) { obj = ERR_PTR(ret); goto err_free; } - obj = __i915_gem_object_create_stolen(dev_priv, stolen, mem); + obj = __i915_gem_object_create_stolen(mem, stolen); if (IS_ERR(obj)) goto err_remove; return obj; err_remove: - i915_gem_stolen_remove_node(dev_priv, stolen); + i915_gem_stolen_remove_node(i915, stolen); err_free: kfree(stolen); return obj; } struct drm_i915_gem_object * -i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, +i915_gem_object_create_stolen(struct drm_i915_private *i915, resource_size_t size) { - return i915_gem_object_create_region(dev_priv->mm.regions[INTEL_REGION_STOLEN], + return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_STOLEN], size, I915_BO_ALLOC_CONTIGUOUS); } @@ -665,18 +673,19 @@ struct intel_memory_region *i915_gem_stolen_setup(struct drm_i915_private *i915) } struct drm_i915_gem_object * -i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv, +i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *i915, resource_size_t stolen_offset, resource_size_t gtt_offset, resource_size_t size) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct intel_memory_region *mem = i915->mm.regions[INTEL_REGION_STOLEN]; + struct i915_ggtt *ggtt = &i915->ggtt; struct drm_i915_gem_object *obj; struct drm_mm_node *stolen; struct i915_vma *vma; int ret; - if (!drm_mm_initialized(&dev_priv->mm.stolen)) + if (!drm_mm_initialized(&i915->mm.stolen)) return ERR_PTR(-ENODEV); DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n", @@ -694,19 +703,19 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv stolen->start = stolen_offset; stolen->size = size; - mutex_lock(&dev_priv->mm.stolen_lock); - ret = drm_mm_reserve_node(&dev_priv->mm.stolen, stolen); - mutex_unlock(&dev_priv->mm.stolen_lock); + mutex_lock(&i915->mm.stolen_lock); + ret = drm_mm_reserve_node(&i915->mm.stolen, stolen); + mutex_unlock(&i915->mm.stolen_lock); if (ret) { DRM_DEBUG_DRIVER("failed to allocate stolen space\n"); kfree(stolen); return ERR_PTR(ret); } - obj = __i915_gem_object_create_stolen(dev_priv, stolen, NULL); + obj = __i915_gem_object_create_stolen(mem, stolen); if (IS_ERR(obj)) { DRM_DEBUG_DRIVER("failed to allocate stolen object\n"); - i915_gem_stolen_remove_node(dev_priv, stolen); + i915_gem_stolen_remove_node(i915, stolen); kfree(stolen); return obj; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index 1fa592d82af5..6c7825a2dc2a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -11,6 +11,7 @@ #include "i915_drv.h" #include "i915_gem.h" #include "i915_gem_ioctls.h" +#include "i915_gem_mman.h" #include "i915_gem_object.h" /** diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 4c72d74d6576..e5558af111e2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -129,9 +129,10 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, spin_unlock(&mn->lock); ret = i915_gem_object_unbind(obj, - I915_GEM_OBJECT_UNBIND_ACTIVE); + I915_GEM_OBJECT_UNBIND_ACTIVE | + I915_GEM_OBJECT_UNBIND_BARRIER); if (ret == 0) - ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); + ret = __i915_gem_object_put_pages(obj); i915_gem_object_put(obj); if (ret) return ret; @@ -459,31 +460,36 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) if (pvec != NULL) { struct mm_struct *mm = obj->userptr.mm->mm; unsigned int flags = 0; + int locked = 0; if (!i915_gem_object_is_readonly(obj)) flags |= FOLL_WRITE; ret = -EFAULT; if (mmget_not_zero(mm)) { - down_read(&mm->mmap_sem); while (pinned < npages) { + if (!locked) { + down_read(&mm->mmap_sem); + locked = 1; + } ret = get_user_pages_remote (work->task, mm, obj->userptr.ptr + pinned * PAGE_SIZE, npages - pinned, flags, - pvec + pinned, NULL, NULL); + pvec + pinned, NULL, &locked); if (ret < 0) break; pinned += ret; } - up_read(&mm->mmap_sem); + if (locked) + up_read(&mm->mmap_sem); mmput(mm); } } - mutex_lock(&obj->mm.lock); + mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES); if (obj->userptr.work == &work->work) { struct sg_table *pages = ERR_PTR(ret); @@ -773,15 +779,11 @@ i915_gem_userptr_ioctl(struct drm_device *dev, return -EFAULT; if (args->flags & I915_USERPTR_READ_ONLY) { - struct i915_address_space *vm; - /* * On almost all of the older hw, we cannot tell the GPU that * a page is readonly. */ - vm = rcu_dereference_protected(dev_priv->kernel_context->vm, - true); /* static vm */ - if (!vm || !vm->has_read_only) + if (!dev_priv->gt.vm->has_read_only) return -ENODEV; } diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c index 892d12db6c49..fa16f2c3f3ac 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c @@ -12,10 +12,14 @@ static void huge_free_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { unsigned long nreal = obj->scratch / PAGE_SIZE; - struct scatterlist *sg; + struct sgt_iter sgt_iter; + struct page *page; - for (sg = pages->sgl; sg && nreal--; sg = __sg_next(sg)) - __free_page(sg_page(sg)); + for_each_sgt_page(page, sgt_iter, pages) { + __free_page(page); + if (!--nreal) + break; + } sg_free_table(pages); kfree(pages); @@ -70,7 +74,6 @@ static int huge_get_pages(struct drm_i915_gem_object *obj) err: huge_free_pages(obj, pages); - return -ENOMEM; #undef GFP } diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 688c49a24f32..2479395c1873 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -517,7 +517,7 @@ static int igt_mock_memory_region_huge_pages(void *arg) i915_vma_unpin(vma); i915_vma_close(vma); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + __i915_gem_object_put_pages(obj); i915_gem_object_put(obj); } } @@ -650,7 +650,7 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg) i915_vma_close(vma); i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + __i915_gem_object_put_pages(obj); i915_gem_object_put(obj); } @@ -678,7 +678,7 @@ static void close_object_list(struct list_head *objects, list_del(&obj->st_link); i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + __i915_gem_object_put_pages(obj); i915_gem_object_put(obj); } } @@ -948,7 +948,7 @@ static int igt_mock_ppgtt_64K(void *arg) i915_vma_close(vma); i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + __i915_gem_object_put_pages(obj); i915_gem_object_put(obj); } } @@ -1110,8 +1110,7 @@ static int __igt_write_huge(struct intel_context *ce, out_vma_unpin: i915_vma_unpin(vma); out_vma_close: - i915_vma_destroy(vma); - + __i915_vma_put(vma); return err; } @@ -1301,7 +1300,7 @@ static int igt_ppgtt_exhaust_huge(void *arg) } i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + __i915_gem_object_put_pages(obj); i915_gem_object_put(obj); } } @@ -1420,7 +1419,7 @@ try_again: err = i915_gem_object_pin_pages(obj); if (err) { - if (err == -ENXIO) { + if (err == -ENXIO || err == -E2BIG) { i915_gem_object_put(obj); size >>= 1; goto try_again; @@ -1442,7 +1441,7 @@ try_again: } out_unpin: i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + __i915_gem_object_put_pages(obj); out_put: i915_gem_object_put(obj); @@ -1530,7 +1529,7 @@ static int igt_ppgtt_sanity_check(void *arg) err = igt_write_huge(ctx, obj); i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + __i915_gem_object_put_pages(obj); i915_gem_object_put(obj); if (err) { @@ -1912,9 +1911,9 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ppgtt_smoke_huge), SUBTEST(igt_ppgtt_sanity_check), }; - struct drm_file *file; struct i915_gem_context *ctx; struct i915_address_space *vm; + struct file *file; int err; if (!HAS_PPGTT(i915)) { @@ -1944,6 +1943,6 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) err = i915_subtests(tests, ctx); out_file: - mock_file_free(i915, file); + fput(file); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index da8edee4fe0a..b972be165e85 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -24,6 +24,7 @@ static int __igt_client_fill(struct intel_engine_cs *engine) prandom_seed_state(&prng, i915_selftest.random_seed); + intel_engine_pm_get(engine); do { const u32 max_block_size = S16_MAX * PAGE_SIZE; u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng)); @@ -99,6 +100,7 @@ err_put: err_flush: if (err == -ENOMEM) err = 0; + intel_engine_pm_put(engine); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 2b29f6b4e1dd..49edc51111d5 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -6,6 +6,7 @@ #include <linux/prime_numbers.h> +#include "gt/intel_engine_pm.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" #include "gt/intel_ring.h" @@ -200,7 +201,7 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v) if (IS_ERR(vma)) return PTR_ERR(vma); - rq = i915_request_create(ctx->engine->kernel_context); + rq = intel_engine_create_kernel_request(ctx->engine); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); @@ -326,6 +327,7 @@ static int igt_gem_coherency(void *arg) ctx.engine = random_engine(i915, &prng); GEM_BUG_ON(!ctx.engine); pr_info("%s: using %s\n", __func__, ctx.engine->name); + intel_engine_pm_get(ctx.engine); for (over = igt_coherency_mode; over->name; over++) { if (!over->set) @@ -404,6 +406,7 @@ static int igt_gem_coherency(void *arg) } } free: + intel_engine_pm_put(ctx.engine); kfree(offsets); return err; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 62fabc023a83..7fc46861a54d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -7,6 +7,7 @@ #include <linux/prime_numbers.h> #include "gem/i915_gem_pm.h" +#include "gt/intel_engine_pm.h" #include "gt/intel_gt.h" #include "gt/intel_gt_requests.h" #include "gt/intel_reset.h" @@ -26,6 +27,12 @@ #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) +static inline struct i915_address_space *ctx_vm(struct i915_gem_context *ctx) +{ + /* single threaded, private ctx */ + return rcu_dereference_protected(ctx->vm, true); +} + static int live_nop_switch(void *arg) { const unsigned int nctx = 1024; @@ -33,7 +40,7 @@ static int live_nop_switch(void *arg) struct intel_engine_cs *engine; struct i915_gem_context **ctx; struct igt_live_test t; - struct drm_file *file; + struct file *file; unsigned long n; int err = -ENODEV; @@ -67,25 +74,34 @@ static int live_nop_switch(void *arg) } for_each_uabi_engine(engine, i915) { - struct i915_request *rq; + struct i915_request *rq = NULL; unsigned long end_time, prime; ktime_t times[2] = {}; times[0] = ktime_get_raw(); for (n = 0; n < nctx; n++) { - rq = igt_request_alloc(ctx[n], engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); + struct i915_request *this; + + this = igt_request_alloc(ctx[n], engine); + if (IS_ERR(this)) { + err = PTR_ERR(this); goto out_file; } - i915_request_add(rq); + if (rq) { + i915_request_await_dma_fence(this, &rq->fence); + i915_request_put(rq); + } + rq = i915_request_get(this); + i915_request_add(this); } if (i915_request_wait(rq, 0, HZ / 5) < 0) { pr_err("Failed to populated %d contexts\n", nctx); intel_gt_set_wedged(&i915->gt); + i915_request_put(rq); err = -EIO; goto out_file; } + i915_request_put(rq); times[1] = ktime_get_raw(); @@ -100,13 +116,21 @@ static int live_nop_switch(void *arg) for_each_prime_number_from(prime, 2, 8192) { times[1] = ktime_get_raw(); + rq = NULL; for (n = 0; n < prime; n++) { - rq = igt_request_alloc(ctx[n % nctx], engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); + struct i915_request *this; + + this = igt_request_alloc(ctx[n % nctx], engine); + if (IS_ERR(this)) { + err = PTR_ERR(this); goto out_file; } + if (rq) { /* Force submission order */ + i915_request_await_dma_fence(this, &rq->fence); + i915_request_put(rq); + } + /* * This space is left intentionally blank. * @@ -121,14 +145,18 @@ static int live_nop_switch(void *arg) * for latency. */ - i915_request_add(rq); + rq = i915_request_get(this); + i915_request_add(this); } + GEM_BUG_ON(!rq); if (i915_request_wait(rq, 0, HZ / 5) < 0) { pr_err("Switching between %ld contexts timed out\n", prime); intel_gt_set_wedged(&i915->gt); + i915_request_put(rq); break; } + i915_request_put(rq); times[1] = ktime_sub(ktime_get_raw(), times[1]); if (prime == 2) @@ -149,7 +177,7 @@ static int live_nop_switch(void *arg) } out_file: - mock_file_free(i915, file); + fput(file); return err; } @@ -255,7 +283,7 @@ static int live_parallel_switch(void *arg) int (* const *fn)(void *arg); struct i915_gem_context *ctx; struct intel_context *ce; - struct drm_file *file; + struct file *file; int n, m, count; int err = 0; @@ -309,7 +337,7 @@ static int live_parallel_switch(void *arg) if (!data[m].ce[0]) continue; - ce = intel_context_create(ctx, data[m].ce[0]->engine); + ce = intel_context_create(data[m].ce[0]->engine); if (IS_ERR(ce)) goto out; @@ -377,7 +405,7 @@ out: } kfree(data); out_file: - mock_file_free(i915, file); + fput(file); return err; } @@ -502,17 +530,17 @@ out_unmap: return err; } -static int file_add_object(struct drm_file *file, - struct drm_i915_gem_object *obj) +static int file_add_object(struct file *file, struct drm_i915_gem_object *obj) { int err; GEM_BUG_ON(obj->base.handle_count); /* tie the object to the drm_file for easy reaping */ - err = idr_alloc(&file->object_idr, &obj->base, 1, 0, GFP_KERNEL); + err = idr_alloc(&to_drm_file(file)->object_idr, + &obj->base, 1, 0, GFP_KERNEL); if (err < 0) - return err; + return err; i915_gem_object_get(obj); obj->base.handle_count++; @@ -521,7 +549,7 @@ static int file_add_object(struct drm_file *file, static struct drm_i915_gem_object * create_test_object(struct i915_address_space *vm, - struct drm_file *file, + struct file *file, struct list_head *objects) { struct drm_i915_gem_object *obj; @@ -621,9 +649,9 @@ static int igt_ctx_exec(void *arg) unsigned long ncontexts, ndwords, dw; struct i915_request *tq[5] = {}; struct igt_live_test t; - struct drm_file *file; IGT_TIMEOUT(end_time); LIST_HEAD(objects); + struct file *file; if (!intel_engine_can_store_dword(engine)) continue; @@ -716,7 +744,7 @@ out_file: if (igt_live_test_end(&t)) err = -EIO; - mock_file_free(i915, file); + fput(file); if (err) return err; @@ -733,7 +761,7 @@ static int igt_shared_ctx_exec(void *arg) struct i915_gem_context *parent; struct intel_engine_cs *engine; struct igt_live_test t; - struct drm_file *file; + struct file *file; int err = 0; /* @@ -786,14 +814,15 @@ static int igt_shared_ctx_exec(void *arg) } mutex_lock(&ctx->mutex); - __assign_ppgtt(ctx, parent->vm); + __assign_ppgtt(ctx, ctx_vm(parent)); mutex_unlock(&ctx->mutex); ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); GEM_BUG_ON(IS_ERR(ce)); if (!obj) { - obj = create_test_object(parent->vm, file, &objects); + obj = create_test_object(ctx_vm(parent), + file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); intel_context_put(ce); @@ -854,7 +883,7 @@ out_test: if (igt_live_test_end(&t)) err = -EIO; out_file: - mock_file_free(i915, file); + fput(file); return err; } @@ -1140,8 +1169,7 @@ out: igt_spinner_end(spin); if ((flags & TEST_IDLE) && ret == 0) { - ret = intel_gt_wait_for_idle(ce->engine->gt, - MAX_SCHEDULE_TIMEOUT); + ret = igt_flush_test(ce->engine->i915); if (ret) return ret; @@ -1163,9 +1191,11 @@ __sseu_test(const char *name, struct igt_spinner *spin = NULL; int ret; + intel_engine_pm_get(ce->engine); + ret = __sseu_prepare(name, flags, ce, &spin); if (ret) - return ret; + goto out_pm; ret = intel_context_reconfigure_sseu(ce, sseu); if (ret) @@ -1180,6 +1210,8 @@ out_spin: igt_spinner_fini(spin); kfree(spin); } +out_pm: + intel_engine_pm_put(ce->engine); return ret; } @@ -1232,8 +1264,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915, hweight32(engine->sseu.slice_mask), hweight32(pg_sseu.slice_mask)); - ce = intel_context_create(engine->kernel_context->gem_context, - engine); + ce = intel_context_create(engine); if (IS_ERR(ce)) { ret = PTR_ERR(ce); goto out_put; @@ -1311,16 +1342,18 @@ static int igt_ctx_sseu(void *arg) static int igt_ctx_readonly(void *arg) { struct drm_i915_private *i915 = arg; + unsigned long idx, ndwords, dw, num_engines; struct drm_i915_gem_object *obj = NULL; struct i915_request *tq[5] = {}; + struct i915_gem_engines_iter it; struct i915_address_space *vm; struct i915_gem_context *ctx; - unsigned long idx, ndwords, dw; + struct intel_context *ce; struct igt_live_test t; - struct drm_file *file; I915_RND_STATE(prng); IGT_TIMEOUT(end_time); LIST_HEAD(objects); + struct file *file; int err = -ENODEV; /* @@ -1343,21 +1376,21 @@ static int igt_ctx_readonly(void *arg) goto out_file; } - rcu_read_lock(); - vm = rcu_dereference(ctx->vm) ?: &i915->ggtt.alias->vm; + vm = ctx_vm(ctx) ?: &i915->ggtt.alias->vm; if (!vm || !vm->has_read_only) { - rcu_read_unlock(); err = 0; goto out_file; } - rcu_read_unlock(); + + num_engines = 0; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) + if (intel_engine_can_store_dword(ce->engine)) + num_engines++; + i915_gem_context_unlock_engines(ctx); ndwords = 0; dw = 0; while (!time_after(jiffies, end_time)) { - struct i915_gem_engines_iter it; - struct intel_context *ce; - for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { if (!intel_engine_can_store_dword(ce->engine)) @@ -1380,7 +1413,7 @@ static int igt_ctx_readonly(void *arg) pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), ce->engine->name, - yesno(!!rcu_access_pointer(ctx->vm)), + yesno(!!ctx_vm(ctx)), err); i915_gem_context_unlock_engines(ctx); goto out_file; @@ -1400,8 +1433,8 @@ static int igt_ctx_readonly(void *arg) } i915_gem_context_unlock_engines(ctx); } - pr_info("Submitted %lu dwords (across %u engines)\n", - ndwords, RUNTIME_INFO(i915)->num_engines); + pr_info("Submitted %lu dwords (across %lu engines)\n", + ndwords, num_engines); dw = 0; idx = 0; @@ -1426,7 +1459,7 @@ out_file: if (igt_live_test_end(&t)) err = -EIO; - mock_file_free(i915, file); + fput(file); return err; } @@ -1466,7 +1499,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto err; + goto out; } *cmd++ = MI_STORE_DWORD_IMM_GEN4; @@ -1488,12 +1521,12 @@ static int write_to_scratch(struct i915_gem_context *ctx, vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); - goto err_vm; + goto out_vm; } err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); if (err) - goto err_vm; + goto out_vm; err = check_scratch(vm, offset); if (err) @@ -1517,22 +1550,20 @@ static int write_to_scratch(struct i915_gem_context *ctx, if (err) goto skip_request; - i915_vma_unpin_and_release(&vma, 0); + i915_vma_unpin(vma); i915_request_add(rq); - i915_vm_put(vm); - return 0; - + goto out_vm; skip_request: i915_request_skip(rq, err); err_request: i915_request_add(rq); err_unpin: i915_vma_unpin(vma); -err_vm: +out_vm: i915_vm_put(vm); -err: +out: i915_gem_object_put(obj); return err; } @@ -1560,7 +1591,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto err; + goto out; } memset(cmd, POISON_INUSE, PAGE_SIZE); @@ -1592,12 +1623,12 @@ static int read_from_scratch(struct i915_gem_context *ctx, vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); - goto err_vm; + goto out_vm; } err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); if (err) - goto err_vm; + goto out_vm; err = check_scratch(vm, offset); if (err) @@ -1630,29 +1661,27 @@ static int read_from_scratch(struct i915_gem_context *ctx, err = i915_gem_object_set_to_cpu_domain(obj, false); i915_gem_object_unlock(obj); if (err) - goto err_vm; + goto out_vm; cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto err_vm; + goto out_vm; } *value = cmd[result / sizeof(*cmd)]; i915_gem_object_unpin_map(obj); - i915_gem_object_put(obj); - - return 0; + goto out_vm; skip_request: i915_request_skip(rq, err); err_request: i915_request_add(rq); err_unpin: i915_vma_unpin(vma); -err_vm: +out_vm: i915_vm_put(vm); -err: +out: i915_gem_object_put(obj); return err; } @@ -1661,11 +1690,11 @@ static int igt_vm_isolation(void *arg) { struct drm_i915_private *i915 = arg; struct i915_gem_context *ctx_a, *ctx_b; + unsigned long num_engines, count; struct intel_engine_cs *engine; struct igt_live_test t; - struct drm_file *file; I915_RND_STATE(prng); - unsigned long count; + struct file *file; u64 vm_total; int err; @@ -1698,14 +1727,15 @@ static int igt_vm_isolation(void *arg) } /* We can only test vm isolation, if the vm are distinct */ - if (ctx_a->vm == ctx_b->vm) + if (ctx_vm(ctx_a) == ctx_vm(ctx_b)) goto out_file; - vm_total = ctx_a->vm->total; - GEM_BUG_ON(ctx_b->vm->total != vm_total); + vm_total = ctx_vm(ctx_a)->total; + GEM_BUG_ON(ctx_vm(ctx_b)->total != vm_total); vm_total -= I915_GTT_PAGE_SIZE; count = 0; + num_engines = 0; for_each_uabi_engine(engine, i915) { IGT_TIMEOUT(end_time); unsigned long this = 0; @@ -1743,14 +1773,15 @@ static int igt_vm_isolation(void *arg) this++; } count += this; + num_engines++; } - pr_info("Checked %lu scratch offsets across %d engines\n", - count, RUNTIME_INFO(i915)->num_engines); + pr_info("Checked %lu scratch offsets across %lu engines\n", + count, num_engines); out_file: if (igt_live_test_end(&t)) err = -EIO; - mock_file_free(i915, file); + fput(file); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 29b2077b73d2..cbf796da64e3 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -6,12 +6,14 @@ #include <linux/prime_numbers.h> +#include "gt/intel_engine_pm.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" #include "huge_gem_object.h" #include "i915_selftest.h" #include "selftests/i915_random.h" #include "selftests/igt_flush_test.h" +#include "selftests/igt_mmap.h" struct tile { unsigned int width; @@ -161,7 +163,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, kunmap(p); out: - i915_vma_destroy(vma); + __i915_vma_put(vma); return err; } @@ -255,7 +257,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj, if (err) return err; - i915_vma_destroy(vma); + __i915_vma_put(vma); if (igt_timeout(end_time, "%s: timed out after tiling=%d stride=%d\n", @@ -535,7 +537,7 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) if (err) return err; - rq = i915_request_create(engine->kernel_context); + rq = intel_engine_create_kernel_request(engine); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); @@ -563,16 +565,16 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, int expected) { struct drm_i915_gem_object *obj; - int err; + struct i915_mmap_offset *mmo; obj = i915_gem_object_create_internal(i915, size); if (IS_ERR(obj)) return PTR_ERR(obj); - err = create_mmap_offset(obj); + mmo = mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL); i915_gem_object_put(obj); - return err == expected; + return PTR_ERR_OR_ZERO(mmo) == expected; } static void disable_retire_worker(struct drm_i915_private *i915) @@ -606,28 +608,50 @@ static int igt_mmap_offset_exhaustion(void *arg) struct drm_i915_private *i915 = arg; struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm; struct drm_i915_gem_object *obj; - struct drm_mm_node resv, *hole; - u64 hole_start, hole_end; - int loop, err; + struct drm_mm_node *hole, *next; + struct i915_mmap_offset *mmo; + int loop, err = 0; /* Disable background reaper */ disable_retire_worker(i915); GEM_BUG_ON(!i915->gt.awake); + intel_gt_retire_requests(&i915->gt); + i915_gem_drain_freed_objects(i915); /* Trim the device mmap space to only a page */ - memset(&resv, 0, sizeof(resv)); - drm_mm_for_each_hole(hole, mm, hole_start, hole_end) { - resv.start = hole_start; - resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */ - mmap_offset_lock(i915); - err = drm_mm_reserve_node(mm, &resv); - mmap_offset_unlock(i915); + mmap_offset_lock(i915); + loop = 1; /* PAGE_SIZE units */ + list_for_each_entry_safe(hole, next, &mm->hole_stack, hole_stack) { + struct drm_mm_node *resv; + + resv = kzalloc(sizeof(*resv), GFP_NOWAIT); + if (!resv) { + err = -ENOMEM; + goto out_park; + } + + resv->start = drm_mm_hole_node_start(hole) + loop; + resv->size = hole->hole_size - loop; + resv->color = -1ul; + loop = 0; + + if (!resv->size) { + kfree(resv); + continue; + } + + pr_debug("Reserving hole [%llx + %llx]\n", + resv->start, resv->size); + + err = drm_mm_reserve_node(mm, resv); if (err) { pr_err("Failed to trim VMA manager, err=%d\n", err); + kfree(resv); goto out_park; } - break; } + GEM_BUG_ON(!list_is_singular(&mm->hole_stack)); + mmap_offset_unlock(i915); /* Just fits! */ if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) { @@ -650,9 +674,10 @@ static int igt_mmap_offset_exhaustion(void *arg) goto out; } - err = create_mmap_offset(obj); - if (err) { + mmo = mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL); + if (IS_ERR(mmo)) { pr_err("Unable to insert object into reclaimed hole\n"); + err = PTR_ERR(mmo); goto err_obj; } @@ -684,9 +709,15 @@ static int igt_mmap_offset_exhaustion(void *arg) out: mmap_offset_lock(i915); - drm_mm_remove_node(&resv); - mmap_offset_unlock(i915); out_park: + drm_mm_for_each_node_safe(hole, next, mm) { + if (hole->color != -1ul) + continue; + + drm_mm_remove_node(hole); + kfree(hole); + } + mmap_offset_unlock(i915); restore_retire_worker(i915); return err; err_obj: @@ -694,12 +725,258 @@ err_obj: goto out; } +#define expand32(x) (((x) << 0) | ((x) << 8) | ((x) << 16) | ((x) << 24)) +static int igt_mmap(void *arg, enum i915_mmap_type type) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct i915_mmap_offset *mmo; + struct vm_area_struct *area; + unsigned long addr; + void *vaddr; + int err = 0, i; + + if (!i915_ggtt_has_aperture(&i915->ggtt)) + return 0; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out; + } + memset(vaddr, POISON_INUSE, PAGE_SIZE); + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + + mmo = mmap_offset_attach(obj, type, NULL); + if (IS_ERR(mmo)) { + err = PTR_ERR(mmo); + goto out; + } + + addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + if (IS_ERR_VALUE(addr)) { + err = addr; + goto out; + } + + pr_debug("igt_mmap() @ %lx\n", addr); + + area = find_vma(current->mm, addr); + if (!area) { + pr_err("Did not create a vm_area_struct for the mmap\n"); + err = -EINVAL; + goto out_unmap; + } + + if (area->vm_private_data != mmo) { + pr_err("vm_area_struct did not point back to our mmap_offset object!\n"); + err = -EINVAL; + goto out_unmap; + } + + for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) { + u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux))); + u32 x; + + if (get_user(x, ux)) { + pr_err("Unable to read from mmap, offset:%zd\n", + i * sizeof(x)); + err = -EFAULT; + break; + } + + if (x != expand32(POISON_INUSE)) { + pr_err("Read incorrect value from mmap, offset:%zd, found:%x, expected:%x\n", + i * sizeof(x), x, expand32(POISON_INUSE)); + err = -EINVAL; + break; + } + + x = expand32(POISON_FREE); + if (put_user(x, ux)) { + pr_err("Unable to write to mmap, offset:%zd\n", + i * sizeof(x)); + err = -EFAULT; + break; + } + } + +out_unmap: + vm_munmap(addr, PAGE_SIZE); + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_FORCE_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out; + } + if (err == 0 && memchr_inv(vaddr, POISON_FREE, PAGE_SIZE)) { + pr_err("Write via mmap did not land in backing store\n"); + err = -EINVAL; + } + i915_gem_object_unpin_map(obj); + +out: + i915_gem_object_put(obj); + return err; +} + +static int igt_mmap_gtt(void *arg) +{ + return igt_mmap(arg, I915_MMAP_TYPE_GTT); +} + +static int igt_mmap_cpu(void *arg) +{ + return igt_mmap(arg, I915_MMAP_TYPE_WC); +} + +static int check_present_pte(pte_t *pte, unsigned long addr, void *data) +{ + if (!pte_present(*pte) || pte_none(*pte)) { + pr_err("missing PTE:%lx\n", + (addr - (unsigned long)data) >> PAGE_SHIFT); + return -EINVAL; + } + + return 0; +} + +static int check_absent_pte(pte_t *pte, unsigned long addr, void *data) +{ + if (pte_present(*pte) && !pte_none(*pte)) { + pr_err("present PTE:%lx; expected to be revoked\n", + (addr - (unsigned long)data) >> PAGE_SHIFT); + return -EINVAL; + } + + return 0; +} + +static int check_present(unsigned long addr, unsigned long len) +{ + return apply_to_page_range(current->mm, addr, len, + check_present_pte, (void *)addr); +} + +static int check_absent(unsigned long addr, unsigned long len) +{ + return apply_to_page_range(current->mm, addr, len, + check_absent_pte, (void *)addr); +} + +static int prefault_range(u64 start, u64 len) +{ + const char __user *addr, *end; + char __maybe_unused c; + int err; + + addr = u64_to_user_ptr(start); + end = addr + len; + + for (; addr < end; addr += PAGE_SIZE) { + err = __get_user(c, addr); + if (err) + return err; + } + + return __get_user(c, end - 1); +} + +static int igt_mmap_revoke(void *arg, enum i915_mmap_type type) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct i915_mmap_offset *mmo; + unsigned long addr; + int err; + + if (!i915_ggtt_has_aperture(&i915->ggtt)) + return 0; + + obj = i915_gem_object_create_internal(i915, SZ_4M); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + mmo = mmap_offset_attach(obj, type, NULL); + if (IS_ERR(mmo)) { + err = PTR_ERR(mmo); + goto out; + } + + addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + if (IS_ERR_VALUE(addr)) { + err = addr; + goto out; + } + + err = prefault_range(addr, obj->base.size); + if (err) + goto out_unmap; + + GEM_BUG_ON(mmo->mmap_type == I915_MMAP_TYPE_GTT && + !atomic_read(&obj->bind_count)); + + err = check_present(addr, obj->base.size); + if (err) + goto out_unmap; + + /* + * After unbinding the object from the GGTT, its address may be reused + * for other objects. Ergo we have to revoke the previous mmap PTE + * access as it no longer points to the same object. + */ + err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); + if (err) { + pr_err("Failed to unbind object!\n"); + goto out_unmap; + } + GEM_BUG_ON(atomic_read(&obj->bind_count)); + + if (type != I915_MMAP_TYPE_GTT) { + __i915_gem_object_put_pages(obj); + if (i915_gem_object_has_pages(obj)) { + pr_err("Failed to put-pages object!\n"); + err = -EINVAL; + goto out_unmap; + } + } + + err = check_absent(addr, obj->base.size); + if (err) + goto out_unmap; + +out_unmap: + vm_munmap(addr, obj->base.size); +out: + i915_gem_object_put(obj); + return err; +} + +static int igt_mmap_gtt_revoke(void *arg) +{ + return igt_mmap_revoke(arg, I915_MMAP_TYPE_GTT); +} + +static int igt_mmap_cpu_revoke(void *arg) +{ + return igt_mmap_revoke(arg, I915_MMAP_TYPE_WC); +} + int i915_gem_mman_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_partial_tiling), SUBTEST(igt_smoke_tiling), SUBTEST(igt_mmap_offset_exhaustion), + SUBTEST(igt_mmap_gtt), + SUBTEST(igt_mmap_cpu), + SUBTEST(igt_mmap_gtt_revoke), + SUBTEST(igt_mmap_cpu_revoke), }; return i915_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c index e8132aca0bb6..62077fe46715 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c @@ -41,6 +41,7 @@ static int __perf_fill_blt(struct drm_i915_gem_object *obj) if (!engine) return 0; + intel_engine_pm_get(engine); for (pass = 0; pass < ARRAY_SIZE(t); pass++) { struct intel_context *ce = engine->kernel_context; ktime_t t0, t1; @@ -49,17 +50,20 @@ static int __perf_fill_blt(struct drm_i915_gem_object *obj) err = i915_gem_object_fill_blt(obj, ce, 0); if (err) - return err; + break; err = i915_gem_object_wait(obj, I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT); if (err) - return err; + break; t1 = ktime_get(); t[pass] = ktime_sub(t1, t0); } + intel_engine_pm_put(engine); + if (err) + return err; sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); pr_info("%s: blt %zd KiB fill: %lld MiB/s\n", @@ -109,6 +113,7 @@ static int __perf_copy_blt(struct drm_i915_gem_object *src, struct intel_engine_cs *engine; ktime_t t[5]; int pass; + int err = 0; engine = intel_engine_lookup_user(i915, I915_ENGINE_CLASS_COPY, @@ -116,26 +121,29 @@ static int __perf_copy_blt(struct drm_i915_gem_object *src, if (!engine) return 0; + intel_engine_pm_get(engine); for (pass = 0; pass < ARRAY_SIZE(t); pass++) { struct intel_context *ce = engine->kernel_context; ktime_t t0, t1; - int err; t0 = ktime_get(); err = i915_gem_object_copy_blt(src, dst, ce); if (err) - return err; + break; err = i915_gem_object_wait(dst, I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT); if (err) - return err; + break; t1 = ktime_get(); t[pass] = ktime_sub(t1, t0); } + intel_engine_pm_put(engine); + if (err) + return err; sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); pr_info("%s: blt %zd KiB copy: %lld MiB/s\n", @@ -186,6 +194,8 @@ err_src: struct igt_thread_arg { struct drm_i915_private *i915; + struct i915_gem_context *ctx; + struct file *file; struct rnd_state prng; unsigned int n_cpus; }; @@ -198,24 +208,20 @@ static int igt_fill_blt_thread(void *arg) struct drm_i915_gem_object *obj; struct i915_gem_context *ctx; struct intel_context *ce; - struct drm_file *file; unsigned int prio; IGT_TIMEOUT(end); int err; - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); + ctx = thread->ctx; + if (!ctx) { + ctx = live_context(i915, thread->file); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); - ctx = live_context(i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out_file; + prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); + ctx->sched.priority = I915_USER_PRIORITY(prio); } - prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); - ctx->sched.priority = I915_USER_PRIORITY(prio); - ce = i915_gem_context_get_engine(ctx, BCS0); GEM_BUG_ON(IS_ERR(ce)); @@ -300,8 +306,6 @@ err_flush: err = 0; intel_context_put(ce); -out_file: - mock_file_free(i915, file); return err; } @@ -313,24 +317,20 @@ static int igt_copy_blt_thread(void *arg) struct drm_i915_gem_object *src, *dst; struct i915_gem_context *ctx; struct intel_context *ce; - struct drm_file *file; unsigned int prio; IGT_TIMEOUT(end); int err; - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); + ctx = thread->ctx; + if (!ctx) { + ctx = live_context(i915, thread->file); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); - ctx = live_context(i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out_file; + prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); + ctx->sched.priority = I915_USER_PRIORITY(prio); } - prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); - ctx->sched.priority = I915_USER_PRIORITY(prio); - ce = i915_gem_context_get_engine(ctx, BCS0); GEM_BUG_ON(IS_ERR(ce)); @@ -431,19 +431,18 @@ err_flush: err = 0; intel_context_put(ce); -out_file: - mock_file_free(i915, file); return err; } static int igt_threaded_blt(struct drm_i915_private *i915, - int (*blt_fn)(void *arg)) + int (*blt_fn)(void *arg), + unsigned int flags) +#define SINGLE_CTX BIT(0) { struct igt_thread_arg *thread; struct task_struct **tsk; + unsigned int n_cpus, i; I915_RND_STATE(prng); - unsigned int n_cpus; - unsigned int i; int err = 0; n_cpus = num_online_cpus() + 1; @@ -453,13 +452,27 @@ static int igt_threaded_blt(struct drm_i915_private *i915, return 0; thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL); - if (!thread) { - kfree(tsk); - return 0; + if (!thread) + goto out_tsk; + + thread[0].file = mock_file(i915); + if (IS_ERR(thread[0].file)) { + err = PTR_ERR(thread[0].file); + goto out_thread; + } + + if (flags & SINGLE_CTX) { + thread[0].ctx = live_context(i915, thread[0].file); + if (IS_ERR(thread[0].ctx)) { + err = PTR_ERR(thread[0].ctx); + goto out_file; + } } for (i = 0; i < n_cpus; ++i) { thread[i].i915 = i915; + thread[i].file = thread[0].file; + thread[i].ctx = thread[0].ctx; thread[i].n_cpus = n_cpus; thread[i].prng = I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); @@ -488,29 +501,42 @@ static int igt_threaded_blt(struct drm_i915_private *i915, put_task_struct(tsk[i]); } - kfree(tsk); +out_file: + fput(thread[0].file); +out_thread: kfree(thread); - +out_tsk: + kfree(tsk); return err; } static int igt_fill_blt(void *arg) { - return igt_threaded_blt(arg, igt_fill_blt_thread); + return igt_threaded_blt(arg, igt_fill_blt_thread, 0); +} + +static int igt_fill_blt_ctx0(void *arg) +{ + return igt_threaded_blt(arg, igt_fill_blt_thread, SINGLE_CTX); } static int igt_copy_blt(void *arg) { - return igt_threaded_blt(arg, igt_copy_blt_thread); + return igt_threaded_blt(arg, igt_copy_blt_thread, 0); +} + +static int igt_copy_blt_ctx0(void *arg) +{ + return igt_threaded_blt(arg, igt_copy_blt_thread, SINGLE_CTX); } int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { - SUBTEST(perf_fill_blt), - SUBTEST(perf_copy_blt), SUBTEST(igt_fill_blt), + SUBTEST(igt_fill_blt_ctx0), SUBTEST(igt_copy_blt), + SUBTEST(igt_copy_blt_ctx0), }; if (intel_gt_is_wedged(&i915->gt)) @@ -521,3 +547,16 @@ int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) return i915_live_subtests(tests, i915); } + +int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(perf_fill_blt), + SUBTEST(perf_copy_blt), + }; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return i915_live_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index 29b8984f0e47..7d7e13dc2fdf 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -5,6 +5,7 @@ */ #include "mock_context.h" +#include "selftests/mock_drm.h" #include "selftests/mock_gtt.h" struct i915_gem_context * @@ -36,9 +37,7 @@ mock_context(struct drm_i915_private *i915, if (name) { struct i915_ppgtt *ppgtt; - ctx->name = kstrdup(name, GFP_KERNEL); - if (!ctx->name) - goto err_put; + strncpy(ctx->name, name, sizeof(ctx->name)); ppgtt = mock_ppgtt(i915, name); if (!ppgtt) @@ -74,7 +73,7 @@ void mock_init_contexts(struct drm_i915_private *i915) } struct i915_gem_context * -live_context(struct drm_i915_private *i915, struct drm_file *file) +live_context(struct drm_i915_private *i915, struct file *file) { struct i915_gem_context *ctx; int err; @@ -83,7 +82,7 @@ live_context(struct drm_i915_private *i915, struct drm_file *file) if (IS_ERR(ctx)) return ctx; - err = gem_context_register(ctx, file->driver_priv); + err = gem_context_register(ctx, to_drm_file(file)->driver_priv); if (err < 0) goto err_ctx; @@ -97,7 +96,16 @@ err_ctx: struct i915_gem_context * kernel_context(struct drm_i915_private *i915) { - return i915_gem_context_create_kernel(i915, I915_PRIORITY_NORMAL); + struct i915_gem_context *ctx; + + ctx = i915_gem_create_context(i915, 0); + if (IS_ERR(ctx)) + return ctx; + + i915_gem_context_clear_bannable(ctx); + i915_gem_context_set_persistence(ctx); + + return ctx; } void kernel_context_close(struct i915_gem_context *ctx) diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.h b/drivers/gpu/drm/i915/gem/selftests/mock_context.h index 0b926653914f..fb83d2f09212 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.h +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.h @@ -7,6 +7,9 @@ #ifndef __MOCK_CONTEXT_H #define __MOCK_CONTEXT_H +struct file; +struct drm_i915_private; + void mock_init_contexts(struct drm_i915_private *i915); struct i915_gem_context * @@ -16,7 +19,7 @@ mock_context(struct drm_i915_private *i915, void mock_context_close(struct i915_gem_context *ctx); struct i915_gem_context * -live_context(struct drm_i915_private *i915, struct drm_file *file); +live_context(struct drm_i915_private *i915, struct file *file); struct i915_gem_context *kernel_context(struct drm_i915_private *i915); void kernel_context_close(struct i915_gem_context *ctx); diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h index f0f8bbd82dfc..22818bbb139d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h +++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h @@ -14,7 +14,7 @@ struct mock_dmabuf { struct page *pages[]; }; -static struct mock_dmabuf *to_mock(struct dma_buf *buf) +static inline struct mock_dmabuf *to_mock(struct dma_buf *buf) { return buf->priv; } |