drm/i915/selftests: Unroll the CS frequency loop

Having noticed that MI_BB_START is incurring a memory stall (see the correlation with uncore frequency), we have to unroll the loop in order to diminish the impact of the MI_BB_START on the instruction throughput. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200421171351.19575-1-chris@chris-wilson.co.uk
author: Chris Wilson <chris@chris-wilson.co.uk> 2020-04-21 18:13:51 +0100
committer: Chris Wilson <chris@chris-wilson.co.uk> 2020-04-21 20:48:45 +0100
commit: 33883310cd8ed365a4279600b329c50992e8f528 (patch)
tree: d8a2317f9b7a5f17a6de85c3461b8db0a6e6768e /drivers/gpu/drm/i915/gt/selftest_rps.c
parent: bd3ec9e75893dacfa17f37c7f2bf1c7ed73d4043 (diff)
1 files changed, 20 insertions, 13 deletions
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index e2afc2003caa..0d7ed000aff0 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -49,14 +49,17 @@ create_spin_counter(struct intel_engine_cs *engine,
 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
+	unsigned long end;
 	u32 *base, *cs;
 	int loop, i;
 	int err;
 
-	obj = i915_gem_object_create_internal(vm->i915, 4096);
+	obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
 	if (IS_ERR(obj))
 		return ERR_CAST(obj);
 
+	end = obj->base.size / sizeof(u32) - 1;
+
 	vma = i915_vma_instance(obj, vm, NULL);
 	if (IS_ERR(vma)) {
 		i915_gem_object_put(obj);
@@ -90,27 +93,31 @@ create_spin_counter(struct intel_engine_cs *engine,
 
 	loop = cs - base;
 
-	*cs++ = MI_MATH(4);
-	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
-	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
-	*cs++ = MI_MATH_ADD;
-	*cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
-
-	if (srm) {
-		*cs++ = MI_STORE_REGISTER_MEM_GEN8;
-		*cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
-		*cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
-		*cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
+	/* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
+	for (i = 0; i < 1024; i++) {
+		*cs++ = MI_MATH(4);
+		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
+		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
+		*cs++ = MI_MATH_ADD;
+		*cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
+
+		if (srm) {
+			*cs++ = MI_STORE_REGISTER_MEM_GEN8;
+			*cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
+			*cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
+			*cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
+		}
 	}
 
 	*cs++ = MI_BATCH_BUFFER_START_GEN8;
 	*cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
 	*cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
+	GEM_BUG_ON(cs - base > end);
 
 	i915_gem_object_flush_map(obj);
 
 	*cancel = base + loop;
-	*counter = srm ? memset32(base + 1000, 0, 1) : NULL;
+	*counter = srm ? memset32(base + end, 0, 1) : NULL;
 	return vma;
 }
author	Chris Wilson <chris@chris-wilson.co.uk>	2020-04-21 18:13:51 +0100
committer	Chris Wilson <chris@chris-wilson.co.uk>	2020-04-21 20:48:45 +0100
commit	33883310cd8ed365a4279600b329c50992e8f528 (patch)
tree	d8a2317f9b7a5f17a6de85c3461b8db0a6e6768e /drivers/gpu/drm/i915/gt/selftest_rps.c
parent	bd3ec9e75893dacfa17f37c7f2bf1c7ed73d4043 (diff)