diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2016-10-28 13:58:27 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2016-10-28 20:53:43 +0100 |
commit | e95433c73a11759203af1cae5958f998c9673370 (patch) | |
tree | 021dc483a7beff3d2924089cf1c6d20105f11b8a /drivers/gpu/drm/i915/i915_gem.c | |
parent | c92ac094a9c647ebcff56c0535f865bc7f2aa052 (diff) |
drm/i915: Rearrange i915_wait_request() accounting with callers
Our low-level wait routine has evolved from our generic wait interface
that handled unlocked, RPS boosting, waits with time tracking. If we
push our GEM fence tracking to use reservation_objects (required for
handling multiple timelines), we lose the ability to pass the required
information down to i915_wait_request(). However, if we push the extra
functionality from i915_wait_request() to the individual callsites
(i915_gem_object_wait_rendering and i915_gem_wait_ioctl) that make use
of those extras, we can both simplify our low level wait and prepare for
extending the GEM interface for use of reservation_objects.
v2: Rewrite i915_wait_request() kerneldocs
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-4-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 309 |
1 files changed, 231 insertions, 78 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1254143ab121..537f502123ea 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -292,7 +292,12 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) * must wait for all rendering to complete to the object (as unbinding * must anyway), and retire the requests. */ - ret = i915_gem_object_wait_rendering(obj, false); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT, + NULL); if (ret) return ret; @@ -311,88 +316,172 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) return ret; } -/** - * Ensures that all rendering to the object has completed and the object is - * safe to unbind from the GTT or access from the CPU. - * @obj: i915 gem object - * @readonly: waiting for just read access or read-write access - */ -int -i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, - bool readonly) +static long +i915_gem_object_wait_fence(struct dma_fence *fence, + unsigned int flags, + long timeout, + struct intel_rps_client *rps) { - struct reservation_object *resv; - struct i915_gem_active *active; - unsigned long active_mask; - int idx; + struct drm_i915_gem_request *rq; - lockdep_assert_held(&obj->base.dev->struct_mutex); + BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); - if (!readonly) { - active = obj->last_read; - active_mask = i915_gem_object_get_active(obj); - } else { - active_mask = 1; - active = &obj->last_write; + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + return timeout; + + if (!dma_fence_is_i915(fence)) + return dma_fence_wait_timeout(fence, + flags & I915_WAIT_INTERRUPTIBLE, + timeout); + + rq = to_request(fence); + if (i915_gem_request_completed(rq)) + goto out; + + /* This client is about to stall waiting for the GPU. In many cases + * this is undesirable and limits the throughput of the system, as + * many clients cannot continue processing user input/output whilst + * blocked. RPS autotuning may take tens of milliseconds to respond + * to the GPU load and thus incurs additional latency for the client. + * We can circumvent that by promoting the GPU frequency to maximum + * before we wait. This makes the GPU throttle up much more quickly + * (good for benchmarks and user experience, e.g. window animations), + * but at a cost of spending more power processing the workload + * (bad for battery). Not all clients even want their results + * immediately and for them we should just let the GPU select its own + * frequency to maximise efficiency. To prevent a single client from + * forcing the clocks too high for the whole system, we only allow + * each client to waitboost once in a busy period. + */ + if (rps) { + if (INTEL_GEN(rq->i915) >= 6) + gen6_rps_boost(rq->i915, rps, rq->emitted_jiffies); + else + rps = NULL; } - for_each_active(active_mask, idx) { + timeout = i915_wait_request(rq, flags, timeout); + +out: + if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq)) + i915_gem_request_retire_upto(rq); + + if (rps && rq->fence.seqno == rq->engine->last_submitted_seqno) { + /* The GPU is now idle and this client has stalled. + * Since no other client has submitted a request in the + * meantime, assume that this client is the only one + * supplying work to the GPU but is unable to keep that + * work supplied because it is waiting. Since the GPU is + * then never kept fully busy, RPS autoclocking will + * keep the clocks relatively low, causing further delays. + * Compensate by giving the synchronous client credit for + * a waitboost next time. + */ + spin_lock(&rq->i915->rps.client_lock); + list_del_init(&rps->link); + spin_unlock(&rq->i915->rps.client_lock); + } + + return timeout; +} + +static long +i915_gem_object_wait_reservation(struct reservation_object *resv, + unsigned int flags, + long timeout, + struct intel_rps_client *rps) +{ + struct dma_fence *excl; + + if (flags & I915_WAIT_ALL) { + struct dma_fence **shared; + unsigned int count, i; int ret; - ret = i915_gem_active_wait(&active[idx], - &obj->base.dev->struct_mutex); + ret = reservation_object_get_fences_rcu(resv, + &excl, &count, &shared); if (ret) return ret; - } - resv = i915_gem_object_get_dmabuf_resv(obj); - if (resv) { - long err; + for (i = 0; i < count; i++) { + timeout = i915_gem_object_wait_fence(shared[i], + flags, timeout, + rps); + if (timeout <= 0) + break; + + dma_fence_put(shared[i]); + } - err = reservation_object_wait_timeout_rcu(resv, !readonly, true, - MAX_SCHEDULE_TIMEOUT); - if (err < 0) - return err; + for (; i < count; i++) + dma_fence_put(shared[i]); + kfree(shared); + } else { + excl = reservation_object_get_excl_rcu(resv); } - return 0; + if (excl && timeout > 0) + timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps); + + dma_fence_put(excl); + + return timeout; } -/* A nonblocking variant of the above wait. Must be called prior to - * acquiring the mutex for the object, as the object state may change - * during this call. A reference must be held by the caller for the object. +/** + * Waits for rendering to the object to be completed + * @obj: i915 gem object + * @flags: how to wait (under a lock, for all rendering or just for writes etc) + * @timeout: how long to wait + * @rps: client (user process) to charge for any waitboosting */ -static __must_check int -__unsafe_wait_rendering(struct drm_i915_gem_object *obj, - struct intel_rps_client *rps, - bool readonly) +int +i915_gem_object_wait(struct drm_i915_gem_object *obj, + unsigned int flags, + long timeout, + struct intel_rps_client *rps) { + struct reservation_object *resv; struct i915_gem_active *active; unsigned long active_mask; int idx; - active_mask = __I915_BO_ACTIVE(obj); - if (!active_mask) - return 0; + might_sleep(); +#if IS_ENABLED(CONFIG_LOCKDEP) + GEM_BUG_ON(debug_locks && + !!lockdep_is_held(&obj->base.dev->struct_mutex) != + !!(flags & I915_WAIT_LOCKED)); +#endif + GEM_BUG_ON(timeout < 0); - if (!readonly) { + if (flags & I915_WAIT_ALL) { active = obj->last_read; + active_mask = i915_gem_object_get_active(obj); } else { active_mask = 1; active = &obj->last_write; } for_each_active(active_mask, idx) { - int ret; - - ret = i915_gem_active_wait_unlocked(&active[idx], - I915_WAIT_INTERRUPTIBLE, - NULL, rps); - if (ret) - return ret; + struct drm_i915_gem_request *request; + + request = i915_gem_active_get_unlocked(&active[idx]); + if (request) { + timeout = i915_gem_object_wait_fence(&request->fence, + flags, timeout, + rps); + i915_gem_request_put(request); + } + if (timeout < 0) + return timeout; } - return 0; + resv = i915_gem_object_get_dmabuf_resv(obj); + if (resv) + timeout = i915_gem_object_wait_reservation(resv, + flags, timeout, + rps); + return timeout < 0 ? timeout : 0; } static struct intel_rps_client *to_rps_client(struct drm_file *file) @@ -449,12 +538,18 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, struct drm_device *dev = obj->base.dev; void *vaddr = obj->phys_handle->vaddr + args->offset; char __user *user_data = u64_to_user_ptr(args->data_ptr); - int ret = 0; + int ret; /* We manually control the domain here and pretend that it * remains coherent i.e. in the GTT domain, like shmem_pwrite. */ - ret = i915_gem_object_wait_rendering(obj, false); + lockdep_assert_held(&obj->base.dev->struct_mutex); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT, + to_rps_client(file_priv)); if (ret) return ret; @@ -614,12 +709,17 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, { int ret; - *needs_clflush = 0; + lockdep_assert_held(&obj->base.dev->struct_mutex); + *needs_clflush = 0; if (!i915_gem_object_has_struct_page(obj)) return -ENODEV; - ret = i915_gem_object_wait_rendering(obj, true); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT, + NULL); if (ret) return ret; @@ -661,11 +761,18 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, { int ret; + lockdep_assert_held(&obj->base.dev->struct_mutex); + *needs_clflush = 0; if (!i915_gem_object_has_struct_page(obj)) return -ENODEV; - ret = i915_gem_object_wait_rendering(obj, false); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT, + NULL); if (ret) return ret; @@ -1051,7 +1158,10 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, trace_i915_gem_object_pread(obj, args->offset, args->size); - ret = __unsafe_wait_rendering(obj, to_rps_client(file), true); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT, + to_rps_client(file)); if (ret) goto err; @@ -1449,7 +1559,11 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, trace_i915_gem_object_pwrite(obj, args->offset, args->size); - ret = __unsafe_wait_rendering(obj, to_rps_client(file), false); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT, + to_rps_client(file)); if (ret) goto err; @@ -1536,7 +1650,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, * We will repeat the flush holding the lock in the normal manner * to catch cases where we are gazumped. */ - ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + (write_domain ? I915_WAIT_ALL : 0), + MAX_SCHEDULE_TIMEOUT, + to_rps_client(file)); if (ret) goto err; @@ -1772,7 +1890,10 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) * repeat the flush holding the lock in the normal manner to catch cases * where we are gazumped. */ - ret = __unsafe_wait_rendering(obj, NULL, !write); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT, + NULL); if (ret) goto err; @@ -2817,6 +2938,17 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) mutex_unlock(&obj->base.dev->struct_mutex); } +static unsigned long to_wait_timeout(s64 timeout_ns) +{ + if (timeout_ns < 0) + return MAX_SCHEDULE_TIMEOUT; + + if (timeout_ns == 0) + return 0; + + return nsecs_to_jiffies_timeout(timeout_ns); +} + /** * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT * @dev: drm device pointer @@ -2845,10 +2977,9 @@ int i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_wait *args = data; - struct intel_rps_client *rps = to_rps_client(file); struct drm_i915_gem_object *obj; - unsigned long active; - int idx, ret = 0; + ktime_t start; + long ret; if (args->flags != 0) return -EINVAL; @@ -2857,14 +2988,17 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (!obj) return -ENOENT; - active = __I915_BO_ACTIVE(obj); - for_each_active(active, idx) { - s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL; - ret = i915_gem_active_wait_unlocked(&obj->last_read[idx], - I915_WAIT_INTERRUPTIBLE, - timeout, rps); - if (ret) - break; + start = ktime_get(); + + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, + to_wait_timeout(args->timeout_ns), + to_rps_client(file)); + + if (args->timeout_ns > 0) { + args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); + if (args->timeout_ns < 0) + args->timeout_ns = 0; } i915_gem_object_put_unlocked(obj); @@ -3283,7 +3417,13 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) uint32_t old_write_domain, old_read_domains; int ret; - ret = i915_gem_object_wait_rendering(obj, !write); + lockdep_assert_held(&obj->base.dev->struct_mutex); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED | + (write ? I915_WAIT_ALL : 0), + MAX_SCHEDULE_TIMEOUT, + NULL); if (ret) return ret; @@ -3400,7 +3540,12 @@ restart: * If we wait upon the object, we know that all the bound * VMA are no longer active. */ - ret = i915_gem_object_wait_rendering(obj, false); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT, + NULL); if (ret) return ret; @@ -3647,7 +3792,13 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) uint32_t old_write_domain, old_read_domains; int ret; - ret = i915_gem_object_wait_rendering(obj, !write); + lockdep_assert_held(&obj->base.dev->struct_mutex); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED | + (write ? I915_WAIT_ALL : 0), + MAX_SCHEDULE_TIMEOUT, + NULL); if (ret) return ret; @@ -3703,7 +3854,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) struct drm_i915_file_private *file_priv = file->driver_priv; unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; struct drm_i915_gem_request *request, *target = NULL; - int ret; + long ret; /* ABI: return -EIO if already wedged */ if (i915_terminally_wedged(&dev_priv->gpu_error)) @@ -3730,10 +3881,12 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) if (target == NULL) return 0; - ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE, NULL, NULL); + ret = i915_wait_request(target, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); i915_gem_request_put(target); - return ret; + return ret < 0 ? ret : 0; } static bool |