summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gem_execbuffer.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2016-08-18 17:17:12 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2016-08-18 22:36:59 +0100
commit0b5372727be37944239100ff05a63df9771c8484 (patch)
treeedbad8a45469452c799f030ff601073b7ee9c346 /drivers/gpu/drm/i915/i915_gem_execbuffer.c
parent068715b922a6f87c454cdfa15bb8049d2076eee6 (diff)
drm/i915/cmdparser: Use cached vmappings
The single largest factor in the overhead of parsing the commands is the setup of the virtual mapping to provide a continuous block for the batch buffer. If we keep those vmappings around (against the better judgement of mm/vmalloc.c, which we offset by handwaving and looking suggestively at the shrinker) we can dramatically improve the performance of the parser for small batches (such as media workloads). Furthermore, we can use the prepare shmem read/write functions to determine how best we need to clflush the range (rather than every page of the object). The impact of caching both src/dst vmaps is +80% on ivb and +140% on byt for the throughput on small batches. (Caching just the dst vmap and iterating over the src, doing a page by page copy is roughly 5% slower on both platforms. That may be an acceptable trade-off to eliminate one cached vmapping, and we may be able to reduce the per-page copying overhead further.) For *this* simple test case, the cmdparser is now within a factor of 2 of ideal performance. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Matthew Auld <matthew.william.auld@gmail.com> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20160818161718.27187-33-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c8
1 files changed, 7 insertions, 1 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 907386630e26..4192066ff60e 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1512,7 +1512,7 @@ execbuf_submit(struct i915_execbuffer_params *params,
params->args_batch_start_offset;
if (exec_len == 0)
- exec_len = params->batch->size;
+ exec_len = params->batch->size - params->args_batch_start_offset;
ret = params->engine->emit_bb_start(params->request,
exec_start, exec_len,
@@ -1738,6 +1738,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
ret = -EINVAL;
goto err;
}
+ if (args->batch_start_offset > params->batch->size ||
+ args->batch_len > params->batch->size - args->batch_start_offset) {
+ DRM_DEBUG("Attempting to use out-of-bounds batch\n");
+ ret = -EINVAL;
+ goto err;
+ }
params->args_batch_start_offset = args->batch_start_offset;
if (intel_engine_needs_cmd_parser(engine) && args->batch_len) {