drm/i915: Emit to ringbuffer directly

This removes the usage of intel_ring_emit in favour of directly writing to the ring buffer. intel_ring_emit was preventing the compiler for optimising fetch and increment of the current ring buffer pointer and therefore generating very verbose code for every write. It had no useful purpose since all ringbuffer operations are started and ended with intel_ring_begin and intel_ring_advance respectively, with no bail out in the middle possible, so it is fine to increment the tail in intel_ring_begin and let the code manage the pointer itself. Useless instruction removal amounts to approximately two and half kilobytes of saved text on my build. Not sure if this has any measurable performance implications but executing a ton of useless instructions on fast paths cannot be good. v2: * Change return from intel_ring_begin to error pointer by popular demand. * Move tail increment to intel_ring_advance to enable some error checking. v3: * Move tail advance back into intel_ring_begin. * Rebase and tidy. v4: * Complete rebase after a few months since v3. v5: * Remove unecessary cast and fix !debug compile. (Chris Wilson) v6: * Make intel_ring_offset take request as well. * Fix recording of request postfix plus a sprinkle of asserts. (Chris Wilson) v7: * Use intel_ring_offset to get the postfix. (Chris Wilson) * Convert GVT code as well. v8: * Rename *out++ to *cs++. v9: * Fix GVT out to cs conversion in GVT. v10: * Rebase for new intel_ring_begin in selftests. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Zhi Wang <zhi.a.wang@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Acked-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170214113242.29241-1-tvrtko.ursulin@linux.intel.com
author: Tvrtko Ursulin <tvrtko.ursulin@intel.com> 2017-02-14 11:32:42 +0000
committer: Tvrtko Ursulin <tvrtko.ursulin@intel.com> 2017-02-14 14:30:46 +0000
commit: 73dec95e6ba37d8138bb111be5c9b8a1f3a622ae (patch)
tree: b1051f0903c87d4a1ca9c5100af380c0683a4a70 /drivers/gpu/drm/i915/gvt/cmd_parser.c
parent: d2d1501625e96170958f38646a9fcc9b69bbc2df (diff)
1 files changed, 15 insertions, 21 deletions
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 9a4b23c3ee97..c3d44c03157c 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -1513,7 +1513,7 @@ static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm,
 		len += copy_len;
 		gma += copy_len;
 	}
-	return 0;
+	return len;
 }
 
 
@@ -1630,7 +1630,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 	ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
 			      gma, gma + bb_size,
 			      dst);
-	if (ret) {
+	if (ret < 0) {
 		gvt_err("fail to copy guest ring buffer\n");
 		goto unmap_src;
 	}
@@ -2594,11 +2594,8 @@ out:
 static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
-	int ring_id = workload->ring_id;
-	struct i915_gem_context *shadow_ctx = vgpu->shadow_ctx;
-	struct intel_ring *ring = shadow_ctx->engine[ring_id].ring;
 	unsigned long gma_head, gma_tail, gma_top, guest_rb_size;
-	unsigned int copy_len = 0;
+	u32 *cs;
 	int ret;
 
 	guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl);
@@ -2612,36 +2609,33 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
 	gma_top = workload->rb_start + guest_rb_size;
 
 	/* allocate shadow ring buffer */
-	ret = intel_ring_begin(workload->req, workload->rb_len / 4);
-	if (ret)
-		return ret;
+	cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
 
 	/* get shadow ring buffer va */
-	workload->shadow_ring_buffer_va = ring->vaddr + ring->tail;
+	workload->shadow_ring_buffer_va = cs;
 
 	/* head > tail --> copy head <-> top */
 	if (gma_head > gma_tail) {
 		ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm,
-				gma_head, gma_top,
-				workload->shadow_ring_buffer_va);
-		if (ret) {
+				      gma_head, gma_top, cs);
+		if (ret < 0) {
 			gvt_err("fail to copy guest ring buffer\n");
 			return ret;
 		}
-		copy_len = gma_top - gma_head;
+		cs += ret / sizeof(u32);
 		gma_head = workload->rb_start;
 	}
 
 	/* copy head or start <-> tail */
-	ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm,
-			gma_head, gma_tail,
-			workload->shadow_ring_buffer_va + copy_len);
-	if (ret) {
+	ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, cs);
+	if (ret < 0) {
 		gvt_err("fail to copy guest ring buffer\n");
 		return ret;
 	}
-	ring->tail += workload->rb_len;
-	intel_ring_advance(ring);
+	cs += ret / sizeof(u32);
+	intel_ring_advance(workload->req, cs);
 	return 0;
 }
 
@@ -2695,7 +2689,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 				wa_ctx->workload->vgpu->gtt.ggtt_mm,
 				guest_gma, guest_gma + ctx_size,
 				map);
-	if (ret) {
+	if (ret < 0) {
 		gvt_err("fail to copy guest indirect ctx\n");
 		goto unmap_src;
 	}
author	Tvrtko Ursulin <tvrtko.ursulin@intel.com>	2017-02-14 11:32:42 +0000
committer	Tvrtko Ursulin <tvrtko.ursulin@intel.com>	2017-02-14 14:30:46 +0000
commit	73dec95e6ba37d8138bb111be5c9b8a1f3a622ae (patch)
tree	b1051f0903c87d4a1ca9c5100af380c0683a4a70 /drivers/gpu/drm/i915/gvt/cmd_parser.c
parent	d2d1501625e96170958f38646a9fcc9b69bbc2df (diff)