summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gem_dmabuf.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2016-10-28 13:58:44 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2016-10-28 20:53:50 +0100
commitd07f0e59b2c762584478920cd2d11fba2980a94a (patch)
treea9fc0cade87570bdec356493e6744ee5223bdaaf /drivers/gpu/drm/i915/i915_gem_dmabuf.c
parentf0cd518206e1a47e57bc251e1faba9d38eadcc59 (diff)
drm/i915: Move GEM activity tracking into a common struct reservation_object
In preparation to support many distinct timelines, we need to expand the activity tracking on the GEM object to handle more than just a request per engine. We already use the struct reservation_object on the dma-buf to handle many fence contexts, so integrating that into the GEM object itself is the preferred solution. (For example, we can now share the same reservation_object between every consumer/producer using this buffer and skip the manual import/export via dma-buf.) v2: Reimplement busy-ioctl (by walking the reservation object), postpone the ABI change for another day. Similarly use the reservation object to find the last_write request (if active and from i915) for choosing display CS flips. Caveats: * busy-ioctl: busy-ioctl only reports on the native fences, it will not warn of stalls (in set-domain-ioctl, pread/pwrite etc) if the object is being rendered to by external fences. It also will not report the same busy state as wait-ioctl (or polling on the dma-buf) in the same circumstances. On the plus side, it does retain reporting of which *i915* engines are engaged with this object. * non-blocking atomic modesets take a step backwards as the wait for render completion blocks the ioctl. This is fixed in a subsequent patch to use a fence instead for awaiting on the rendering, see "drm/i915: Restore nonblocking awaits for modesetting" * dynamic array manipulation for shared-fences in reservation is slower than the previous lockless static assignment (e.g. gem_exec_lut_handle runtime on ivb goes from 42s to 66s), mainly due to atomic operations (maintaining the fence refcounts). * loss of object-level retirement callbacks, emulated by VMA retirement tracking. * minor loss of object-level last activity information from debugfs, could be replaced with per-vma information if desired Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-21-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_dmabuf.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_dmabuf.c53
1 files changed, 3 insertions, 50 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 4d45f20d11ed..5e38299b5df6 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -211,60 +211,17 @@ static const struct dma_buf_ops i915_dmabuf_ops = {
.end_cpu_access = i915_gem_end_cpu_access,
};
-static void export_fences(struct drm_i915_gem_object *obj,
- struct dma_buf *dma_buf)
-{
- struct reservation_object *resv = dma_buf->resv;
- struct drm_i915_gem_request *req;
- unsigned long active;
- int idx;
-
- active = __I915_BO_ACTIVE(obj);
- if (!active)
- return;
-
- /* Serialise with execbuf to prevent concurrent fence-loops */
- mutex_lock(&obj->base.dev->struct_mutex);
-
- /* Mark the object for future fences before racily adding old fences */
- obj->base.dma_buf = dma_buf;
-
- ww_mutex_lock(&resv->lock, NULL);
-
- for_each_active(active, idx) {
- req = i915_gem_active_get(&obj->last_read[idx],
- &obj->base.dev->struct_mutex);
- if (!req)
- continue;
-
- if (reservation_object_reserve_shared(resv) == 0)
- reservation_object_add_shared_fence(resv, &req->fence);
-
- i915_gem_request_put(req);
- }
-
- req = i915_gem_active_get(&obj->last_write,
- &obj->base.dev->struct_mutex);
- if (req) {
- reservation_object_add_excl_fence(resv, &req->fence);
- i915_gem_request_put(req);
- }
-
- ww_mutex_unlock(&resv->lock);
- mutex_unlock(&obj->base.dev->struct_mutex);
-}
-
struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gem_obj, int flags)
{
struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
- struct dma_buf *dma_buf;
exp_info.ops = &i915_dmabuf_ops;
exp_info.size = gem_obj->size;
exp_info.flags = flags;
exp_info.priv = gem_obj;
+ exp_info.resv = obj->resv;
if (obj->ops->dmabuf_export) {
int ret = obj->ops->dmabuf_export(obj);
@@ -272,12 +229,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
return ERR_PTR(ret);
}
- dma_buf = drm_gem_dmabuf_export(dev, &exp_info);
- if (IS_ERR(dma_buf))
- return dma_buf;
-
- export_fences(obj, dma_buf);
- return dma_buf;
+ return drm_gem_dmabuf_export(dev, &exp_info);
}
static struct sg_table *
@@ -335,6 +287,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops);
obj->base.import_attach = attach;
+ obj->resv = dma_buf->resv;
/* We use GTT as shorthand for a coherent domain, one that is
* neither in the GPU cache nor in the CPU cache, where all