summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_cmd_parser.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2016-08-18 17:17:18 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2016-08-18 22:37:01 +0100
commit52a42cec4b7088599a9f51187c454d45c460167a (patch)
treee8f98dd1ea9eacf871a61e081f0130f28330463a /drivers/gpu/drm/i915/i915_cmd_parser.c
parent76ff480ec9633d689a14e15bc0e3d10a84e6853b (diff)
drm/i915/cmdparser: Accelerate copies from WC memory
If we need to use clflush to prepare our batch for reads from memory, we can bypass the cache instead by using non-temporal copies. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com> Link: http://patchwork.freedesktop.org/patch/msgid/20160818161718.27187-39-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_cmd_parser.c')
-rw-r--r--drivers/gpu/drm/i915/i915_cmd_parser.c70
1 files changed, 43 insertions, 27 deletions
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index e128e3ab8452..3c72b3b103e7 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -965,8 +965,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
{
unsigned int src_needs_clflush;
unsigned int dst_needs_clflush;
- void *dst, *ptr;
- int offset, n;
+ void *dst, *src;
int ret;
ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush);
@@ -983,31 +982,48 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
if (IS_ERR(dst))
goto unpin_dst;
- ptr = dst;
- offset = offset_in_page(batch_start_offset);
-
- /* We can avoid clflushing partial cachelines before the write if we
- * only every write full cache-lines. Since we know that both the
- * source and destination are in multiples of PAGE_SIZE, we can simply
- * round up to the next cacheline. We don't care about copying too much
- * here as we only validate up to the end of the batch.
- */
- if (dst_needs_clflush & CLFLUSH_BEFORE)
- batch_len = roundup(batch_len, boot_cpu_data.x86_clflush_size);
-
- for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) {
- int len = min_t(int, batch_len, PAGE_SIZE - offset);
- void *vaddr;
-
- vaddr = kmap_atomic(i915_gem_object_get_page(src_obj, n));
- if (src_needs_clflush)
- drm_clflush_virt_range(vaddr + offset, len);
- memcpy(ptr, vaddr + offset, len);
- kunmap_atomic(vaddr);
-
- ptr += len;
- batch_len -= len;
- offset = 0;
+ src = ERR_PTR(-ENODEV);
+ if (src_needs_clflush &&
+ i915_memcpy_from_wc((void *)(uintptr_t)batch_start_offset, 0, 0)) {
+ src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
+ if (!IS_ERR(src)) {
+ i915_memcpy_from_wc(dst,
+ src + batch_start_offset,
+ ALIGN(batch_len, 16));
+ i915_gem_object_unpin_map(src_obj);
+ }
+ }
+ if (IS_ERR(src)) {
+ void *ptr;
+ int offset, n;
+
+ offset = offset_in_page(batch_start_offset);
+
+ /* We can avoid clflushing partial cachelines before the write
+ * if we only every write full cache-lines. Since we know that
+ * both the source and destination are in multiples of
+ * PAGE_SIZE, we can simply round up to the next cacheline.
+ * We don't care about copying too much here as we only
+ * validate up to the end of the batch.
+ */
+ if (dst_needs_clflush & CLFLUSH_BEFORE)
+ batch_len = roundup(batch_len,
+ boot_cpu_data.x86_clflush_size);
+
+ ptr = dst;
+ for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) {
+ int len = min_t(int, batch_len, PAGE_SIZE - offset);
+
+ src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
+ if (src_needs_clflush)
+ drm_clflush_virt_range(src + offset, len);
+ memcpy(ptr, src + offset, len);
+ kunmap_atomic(src);
+
+ ptr += len;
+ batch_len -= len;
+ offset = 0;
+ }
}
/* dst_obj is returned with vmap pinned */