summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c19
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rps.c8
-rw-r--r--drivers/gpu/drm/i915/gt/shaders/README46
-rw-r--r--drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm119
-rw-r--r--drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm117
6 files changed, 297 insertions, 24 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index e4aece20bc80..52db2bde44a3 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -204,25 +204,25 @@ static int __ring_active(struct intel_ring *ring)
{
int err;
- err = i915_active_acquire(&ring->vma->active);
+ err = intel_ring_pin(ring);
if (err)
return err;
- err = intel_ring_pin(ring);
+ err = i915_active_acquire(&ring->vma->active);
if (err)
- goto err_active;
+ goto err_pin;
return 0;
-err_active:
- i915_active_release(&ring->vma->active);
+err_pin:
+ intel_ring_unpin(ring);
return err;
}
static void __ring_retire(struct intel_ring *ring)
{
- intel_ring_unpin(ring);
i915_active_release(&ring->vma->active);
+ intel_ring_unpin(ring);
}
__i915_active_call
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 7c3d8ef4a47c..cb07e1d2a353 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -5396,13 +5396,8 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve)
* typically be the first we inspect for submission.
*/
swp = prandom_u32_max(ve->num_siblings);
- if (!swp)
- return;
-
- swap(ve->siblings[swp], ve->siblings[0]);
- if (!intel_engine_has_relative_mmio(ve->siblings[0]))
- virtual_update_register_offsets(ve->context.lrc_reg_state,
- ve->siblings[0]);
+ if (swp)
+ swap(ve->siblings[swp], ve->siblings[0]);
}
static int virtual_context_alloc(struct intel_context *ce)
@@ -5415,15 +5410,9 @@ static int virtual_context_alloc(struct intel_context *ce)
static int virtual_context_pin(struct intel_context *ce)
{
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
- int err;
/* Note: we must use a real engine class for setting up reg state */
- err = __execlists_context_pin(ce, ve->siblings[0]);
- if (err)
- return err;
-
- virtual_engine_initial_hint(ve);
- return 0;
+ return __execlists_context_pin(ce, ve->siblings[0]);
}
static void virtual_context_enter(struct intel_context *ce)
@@ -5688,6 +5677,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
intel_engine_init_breadcrumbs(&ve->base);
intel_engine_init_execlists(&ve->base);
+ ve->base.breadcrumbs.irq_armed = true; /* fake HW, used for irq_work */
ve->base.cops = &virtual_context_ops;
ve->base.request_alloc = execlists_request_alloc;
@@ -5769,6 +5759,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
+ virtual_engine_initial_hint(ve);
return &ve->context;
err_put:
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index 5049c3dd08a6..c91981e75ebf 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -44,9 +44,9 @@ static int cmp_u64(const void *A, const void *B)
{
const u64 *a = A, *b = B;
- if (a < b)
+ if (*a < *b)
return -1;
- else if (a > b)
+ else if (*a > *b)
return 1;
else
return 0;
@@ -56,9 +56,9 @@ static int cmp_u32(const void *A, const void *B)
{
const u32 *a = A, *b = B;
- if (a < b)
+ if (*a < *b)
return -1;
- else if (a > b)
+ else if (*a > *b)
return 1;
else
return 0;
diff --git a/drivers/gpu/drm/i915/gt/shaders/README b/drivers/gpu/drm/i915/gt/shaders/README
new file mode 100644
index 000000000000..e7e96d7073c7
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/shaders/README
@@ -0,0 +1,46 @@
+ASM sources for auto generated shaders
+======================================
+
+The i915/gt/hsw_clear_kernel.c and i915/gt/ivb_clear_kernel.c files contain
+pre-compiled batch chunks that will clear any residual render cache during
+context switch.
+
+They are generated from their respective platform ASM files present on
+i915/gt/shaders/clear_kernel directory.
+
+The generated .c files should never be modified directly. Instead, any modification
+needs to be done on the on their respective ASM files and build instructions below
+needes to be followed.
+
+Building
+========
+
+Environment
+-----------
+
+IGT GPU tool scripts and the Mesa's i965 instruction assembler tool are used
+on building.
+
+Please make sure your Mesa tool is compiled with "-Dtools=intel" and
+"-Ddri-drivers=i965", and run this script from IGT source root directory"
+
+The instructions bellow assume:
+ * IGT gpu tools source code is located on your home directory (~) as ~/igt
+ * Mesa source code is located on your home directory (~) as ~/mesa
+ and built under the ~/mesa/build directory
+ * Linux kernel source code is under your home directory (~) as ~/linux
+
+Instructions
+------------
+
+~ $ cp ~/linux/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm \
+ ~/igt/lib/i915/shaders/clear_kernel/ivb.asm
+~ $ cd ~/igt
+igt $ ./scripts/generate_clear_kernel.sh -g ivb \
+ -m ~/mesa/build/src/intel/tools/i965_asm
+
+~ $ cp ~/linux/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm \
+ ~/igt/lib/i915/shaders/clear_kernel/hsw.asm
+~ $ cd ~/igt
+igt $ ./scripts/generate_clear_kernel.sh -g hsw \
+ -m ~/mesa/build/src/intel/tools/i965_asm \ No newline at end of file
diff --git a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm
new file mode 100644
index 000000000000..5fdf384bb621
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+/*
+ * Kernel for PAVP buffer clear.
+ *
+ * 1. Clear all 64 GRF registers assigned to the kernel with designated value;
+ * 2. Write 32x16 block of all "0" to render target buffer which indirectly clears
+ * 512 bytes of Render Cache.
+ */
+
+/* Store designated "clear GRF" value */
+mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N };
+
+/**
+ * Curbe Format
+ *
+ * DW 1.0 - Block Offset to write Render Cache
+ * DW 1.1 [15:0] - Clear Word
+ * DW 1.2 - Delay iterations
+ * DW 1.3 - Enable Instrumentation (only for debug)
+ * DW 1.4 - Rsvd (intended for context ID)
+ * DW 1.5 - [31:16]:SliceCount, [15:0]:SubSlicePerSliceCount
+ * DW 1.6 - Rsvd MBZ (intended for Enable Wait on Total Thread Count)
+ * DW 1.7 - Rsvd MBZ (inteded for Total Thread Count)
+ *
+ * Binding Table
+ *
+ * BTI 0: 2D Surface to help clear L3 (Render/Data Cache)
+ * BTI 1: Wait/Instrumentation Buffer
+ * Size : (SliceCount * SubSliceCount * 16 EUs/SubSlice) rows * (16 threads/EU) cols (Format R32_UINT)
+ * Expected to be initialized to 0 by driver/another kernel
+ * Layout:
+ * RowN: Histogram for EU-N: (SliceID*SubSlicePerSliceCount + SSID)*16 + EUID [assume max 16 EUs / SS]
+ * Col-k[DW-k]: Threads Executed on ThreadID-k for EU-N
+ */
+add(1) g1.2<1>UD g1.2<0,1,0>UD 0x00000001UD { align1 1N }; /* Loop count to delay kernel: Init to (g1.2 + 1) */
+cmp.z.f0.0(1) null<1>UD g1.3<0,1,0>UD 0x00000000UD { align1 1N };
+(+f0.0) jmpi(1) 352D { align1 WE_all 1N };
+
+/**
+ * State Register has info on where this thread is running
+ * IVB: sr0.0 :: [15:13]: MBZ, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID
+ * HSW: sr0.0 :: 15: MBZ, [14:13]: SliceID, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID
+ */
+mov(8) g3<1>UD 0x00000000UD { align1 1Q };
+shr(1) g3<1>D sr0<0,1,0>D 12D { align1 1N };
+and(1) g3<1>D g3<0,1,0>D 1D { align1 1N }; /* g3 has HSID */
+shr(1) g3.1<1>D sr0<0,1,0>D 13D { align1 1N };
+and(1) g3.1<1>D g3.1<0,1,0>D 3D { align1 1N }; /* g3.1 has sliceID */
+mul(1) g3.5<1>D g3.1<0,1,0>D g1.10<0,1,0>UW { align1 1N };
+add(1) g3<1>D g3<0,1,0>D g3.5<0,1,0>D { align1 1N }; /* g3 = sliceID * SubSlicePerSliceCount + HSID */
+shr(1) g3.2<1>D sr0<0,1,0>D 8D { align1 1N };
+and(1) g3.2<1>D g3.2<0,1,0>D 15D { align1 1N }; /* g3.2 = EUID */
+mul(1) g3.4<1>D g3<0,1,0>D 16D { align1 1N };
+add(1) g3.2<1>D g3.2<0,1,0>D g3.4<0,1,0>D { align1 1N }; /* g3.2 now points to EU row number (Y-pixel = V address ) in instrumentation surf */
+
+mov(8) g5<1>UD 0x00000000UD { align1 1Q };
+and(1) g3.3<1>D sr0<0,1,0>D 7D { align1 1N };
+mul(1) g3.3<1>D g3.3<0,1,0>D 4D { align1 1N };
+
+mov(8) g4<1>UD g0<8,8,1>UD { align1 1Q }; /* Initialize message header with g0 */
+mov(1) g4<1>UD g3.3<0,1,0>UD { align1 1N }; /* Block offset */
+mov(1) g4.1<1>UD g3.2<0,1,0>UD { align1 1N }; /* Block offset */
+mov(1) g4.2<1>UD 0x00000003UD { align1 1N }; /* Block size (1 row x 4 bytes) */
+and(1) g4.3<1>UD g4.3<0,1,0>UW 0xffffffffUD { align1 1N };
+
+/* Media block read to fetch current value at specified location in instrumentation buffer */
+sendc(8) g5<1>UD g4<8,8,1>F 0x02190001
+
+ render MsgDesc: media block read MsgCtrl = 0x0 Surface = 1 mlen 1 rlen 1 { align1 1Q };
+add(1) g5<1>D g5<0,1,0>D 1D { align1 1N };
+
+/* Media block write for updated value at specified location in instrumentation buffer */
+sendc(8) g5<1>UD g4<8,8,1>F 0x040a8001
+ render MsgDesc: media block write MsgCtrl = 0x0 Surface = 1 mlen 2 rlen 0 { align1 1Q };
+
+/* Delay thread for specified parameter */
+add.nz.f0.0(1) g1.2<1>UD g1.2<0,1,0>UD -1D { align1 1N };
+(+f0.0) jmpi(1) -32D { align1 WE_all 1N };
+
+/* Store designated "clear GRF" value */
+mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N };
+
+/* Initialize looping parameters */
+mov(1) a0<1>D 0D { align1 1N }; /* Initialize a0.0:w=0 */
+mov(1) a0.4<1>W 127W { align1 1N }; /* Loop count. Each loop contains 16 GRF's */
+
+/* Write 32x16 all "0" block */
+mov(8) g2<1>UD g0<8,8,1>UD { align1 1Q };
+mov(8) g127<1>UD g0<8,8,1>UD { align1 1Q };
+mov(2) g2<1>UD g1<2,2,1>UW { align1 1N };
+mov(1) g2.2<1>UD 0x000f000fUD { align1 1N }; /* Block size (16x16) */
+and(1) g2.3<1>UD g2.3<0,1,0>UW 0xffffffefUD { align1 1N };
+mov(16) g3<1>UD 0x00000000UD { align1 1H };
+mov(16) g4<1>UD 0x00000000UD { align1 1H };
+mov(16) g5<1>UD 0x00000000UD { align1 1H };
+mov(16) g6<1>UD 0x00000000UD { align1 1H };
+mov(16) g7<1>UD 0x00000000UD { align1 1H };
+mov(16) g8<1>UD 0x00000000UD { align1 1H };
+mov(16) g9<1>UD 0x00000000UD { align1 1H };
+mov(16) g10<1>UD 0x00000000UD { align1 1H };
+sendc(8) null<1>UD g2<8,8,1>F 0x120a8000
+ render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q };
+add(1) g2<1>UD g1<0,1,0>UW 0x0010UW { align1 1N };
+sendc(8) null<1>UD g2<8,8,1>F 0x120a8000
+ render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q };
+
+/* Now, clear all GRF registers */
+add.nz.f0.0(1) a0.4<1>W a0.4<0,1,0>W -1W { align1 1N };
+mov(16) g[a0]<1>UW f0.1<0,1,0>UW { align1 1H };
+add(1) a0<1>D a0<0,1,0>D 32D { align1 1N };
+(+f0.0) jmpi(1) -64D { align1 WE_all 1N };
+
+/* Terminante the thread */
+sendc(8) null<1>UD g127<8,8,1>F 0x82000010
+ thread_spawner MsgDesc: mlen 1 rlen 0 { align1 1Q EOT };
diff --git a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm
new file mode 100644
index 000000000000..97c7ac9e3854
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+/*
+ * Kernel for PAVP buffer clear.
+ *
+ * 1. Clear all 64 GRF registers assigned to the kernel with designated value;
+ * 2. Write 32x16 block of all "0" to render target buffer which indirectly clears
+ * 512 bytes of Render Cache.
+ */
+
+/* Store designated "clear GRF" value */
+mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N };
+
+/**
+ * Curbe Format
+ *
+ * DW 1.0 - Block Offset to write Render Cache
+ * DW 1.1 [15:0] - Clear Word
+ * DW 1.2 - Delay iterations
+ * DW 1.3 - Enable Instrumentation (only for debug)
+ * DW 1.4 - Rsvd (intended for context ID)
+ * DW 1.5 - [31:16]:SliceCount, [15:0]:SubSlicePerSliceCount
+ * DW 1.6 - Rsvd MBZ (intended for Enable Wait on Total Thread Count)
+ * DW 1.7 - Rsvd MBZ (inteded for Total Thread Count)
+ *
+ * Binding Table
+ *
+ * BTI 0: 2D Surface to help clear L3 (Render/Data Cache)
+ * BTI 1: Wait/Instrumentation Buffer
+ * Size : (SliceCount * SubSliceCount * 16 EUs/SubSlice) rows * (16 threads/EU) cols (Format R32_UINT)
+ * Expected to be initialized to 0 by driver/another kernel
+ * Layout :
+ * RowN: Histogram for EU-N: (SliceID*SubSlicePerSliceCount + SSID)*16 + EUID [assume max 16 EUs / SS]
+ * Col-k[DW-k]: Threads Executed on ThreadID-k for EU-N
+ */
+add(1) g1.2<1>UD g1.2<0,1,0>UD 0x00000001UD { align1 1N }; /* Loop count to delay kernel: Init to (g1.2 + 1) */
+cmp.z.f0.0(1) null<1>UD g1.3<0,1,0>UD 0x00000000UD { align1 1N };
+(+f0.0) jmpi(1) 44D { align1 WE_all 1N };
+
+/**
+ * State Register has info on where this thread is running
+ * IVB: sr0.0 :: [15:13]: MBZ, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID
+ * HSW: sr0.0 :: 15: MBZ, [14:13]: SliceID, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID
+ */
+mov(8) g3<1>UD 0x00000000UD { align1 1Q };
+shr(1) g3<1>D sr0<0,1,0>D 12D { align1 1N };
+and(1) g3<1>D g3<0,1,0>D 1D { align1 1N }; /* g3 has HSID */
+shr(1) g3.1<1>D sr0<0,1,0>D 13D { align1 1N };
+and(1) g3.1<1>D g3.1<0,1,0>D 3D { align1 1N }; /* g3.1 has sliceID */
+mul(1) g3.5<1>D g3.1<0,1,0>D g1.10<0,1,0>UW { align1 1N };
+add(1) g3<1>D g3<0,1,0>D g3.5<0,1,0>D { align1 1N }; /* g3 = sliceID * SubSlicePerSliceCount + HSID */
+shr(1) g3.2<1>D sr0<0,1,0>D 8D { align1 1N };
+and(1) g3.2<1>D g3.2<0,1,0>D 15D { align1 1N }; /* g3.2 = EUID */
+mul(1) g3.4<1>D g3<0,1,0>D 16D { align1 1N };
+add(1) g3.2<1>D g3.2<0,1,0>D g3.4<0,1,0>D { align1 1N }; /* g3.2 now points to EU row number (Y-pixel = V address ) in instrumentation surf */
+
+mov(8) g5<1>UD 0x00000000UD { align1 1Q };
+and(1) g3.3<1>D sr0<0,1,0>D 7D { align1 1N };
+mul(1) g3.3<1>D g3.3<0,1,0>D 4D { align1 1N };
+
+mov(8) g4<1>UD g0<8,8,1>UD { align1 1Q }; /* Initialize message header with g0 */
+mov(1) g4<1>UD g3.3<0,1,0>UD { align1 1N }; /* Block offset */
+mov(1) g4.1<1>UD g3.2<0,1,0>UD { align1 1N }; /* Block offset */
+mov(1) g4.2<1>UD 0x00000003UD { align1 1N }; /* Block size (1 row x 4 bytes) */
+and(1) g4.3<1>UD g4.3<0,1,0>UW 0xffffffffUD { align1 1N };
+
+/* Media block read to fetch current value at specified location in instrumentation buffer */
+sendc(8) g5<1>UD g4<8,8,1>F 0x02190001
+ render MsgDesc: media block read MsgCtrl = 0x0 Surface = 1 mlen 1 rlen 1 { align1 1Q };
+add(1) g5<1>D g5<0,1,0>D 1D { align1 1N };
+
+/* Media block write for updated value at specified location in instrumentation buffer */
+sendc(8) g5<1>UD g4<8,8,1>F 0x040a8001
+ render MsgDesc: media block write MsgCtrl = 0x0 Surface = 1 mlen 2 rlen 0 { align1 1Q };
+/* Delay thread for specified parameter */
+add.nz.f0.0(1) g1.2<1>UD g1.2<0,1,0>UD -1D { align1 1N };
+(+f0.0) jmpi(1) -4D { align1 WE_all 1N };
+
+/* Store designated "clear GRF" value */
+mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N };
+
+/* Initialize looping parameters */
+mov(1) a0<1>D 0D { align1 1N }; /* Initialize a0.0:w=0 */
+mov(1) a0.4<1>W 127W { align1 1N }; /* Loop count. Each loop contains 16 GRF's */
+
+/* Write 32x16 all "0" block */
+mov(8) g2<1>UD g0<8,8,1>UD { align1 1Q };
+mov(8) g127<1>UD g0<8,8,1>UD { align1 1Q };
+mov(2) g2<1>UD g1<2,2,1>UW { align1 1N };
+mov(1) g2.2<1>UD 0x000f000fUD { align1 1N }; /* Block size (16x16) */
+and(1) g2.3<1>UD g2.3<0,1,0>UW 0xffffffefUD { align1 1N };
+mov(16) g3<1>UD 0x00000000UD { align1 1H };
+mov(16) g4<1>UD 0x00000000UD { align1 1H };
+mov(16) g5<1>UD 0x00000000UD { align1 1H };
+mov(16) g6<1>UD 0x00000000UD { align1 1H };
+mov(16) g7<1>UD 0x00000000UD { align1 1H };
+mov(16) g8<1>UD 0x00000000UD { align1 1H };
+mov(16) g9<1>UD 0x00000000UD { align1 1H };
+mov(16) g10<1>UD 0x00000000UD { align1 1H };
+sendc(8) null<1>UD g2<8,8,1>F 0x120a8000
+ render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q };
+add(1) g2<1>UD g1<0,1,0>UW 0x0010UW { align1 1N };
+sendc(8) null<1>UD g2<8,8,1>F 0x120a8000
+ render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q };
+
+/* Now, clear all GRF registers */
+add.nz.f0.0(1) a0.4<1>W a0.4<0,1,0>W -1W { align1 1N };
+mov(16) g[a0]<1>UW f0.1<0,1,0>UW { align1 1H };
+add(1) a0<1>D a0<0,1,0>D 32D { align1 1N };
+(+f0.0) jmpi(1) -8D { align1 WE_all 1N };
+
+/* Terminante the thread */
+sendc(8) null<1>UD g127<8,8,1>F 0x82000010
+ thread_spawner MsgDesc: mlen 1 rlen 0 { align1 1Q EOT };