summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_cmd_parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_cmd_parser.c')
-rw-r--r--drivers/gpu/drm/i915/i915_cmd_parser.c200
1 files changed, 114 insertions, 86 deletions
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 372354d33f55..7654f1be8d3b 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -25,10 +25,20 @@
*
*/
+#include <linux/highmem.h>
+
+#include <drm/drm_cache.h>
+#include <drm/drm_print.h>
+
#include "gt/intel_engine.h"
+#include "gt/intel_engine_regs.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/intel_gt_regs.h"
+#include "i915_cmd_parser.h"
#include "i915_drv.h"
#include "i915_memcpy.h"
+#include "i915_reg.h"
/**
* DOC: batch buffer command parser
@@ -590,6 +600,10 @@ struct drm_i915_reg_descriptor {
{ .addr = _reg(idx) }, \
{ .addr = _reg ## _UDW(idx) }
+#define REG64_BASE_IDX(_reg, base, idx) \
+ { .addr = _reg(base, idx) }, \
+ { .addr = _reg ## _UDW(base, idx) }
+
static const struct drm_i915_reg_descriptor gen7_render_regs[] = {
REG64(GPGPU_THREADS_DISPATCHED),
REG64(HS_INVOCATION_COUNT),
@@ -604,8 +618,8 @@ static const struct drm_i915_reg_descriptor gen7_render_regs[] = {
REG64(PS_INVOCATION_COUNT),
REG64(PS_DEPTH_COUNT),
REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE),
- REG64(MI_PREDICATE_SRC0),
- REG64(MI_PREDICATE_SRC1),
+ REG64_IDX(MI_PREDICATE_SRC0, RENDER_RING_BASE),
+ REG64_IDX(MI_PREDICATE_SRC1, RENDER_RING_BASE),
REG32(GEN7_3DPRIM_END_OFFSET),
REG32(GEN7_3DPRIM_START_VERTEX),
REG32(GEN7_3DPRIM_VERTEX_COUNT),
@@ -635,22 +649,22 @@ static const struct drm_i915_reg_descriptor gen7_render_regs[] = {
};
static const struct drm_i915_reg_descriptor hsw_render_regs[] = {
- REG64_IDX(HSW_CS_GPR, 0),
- REG64_IDX(HSW_CS_GPR, 1),
- REG64_IDX(HSW_CS_GPR, 2),
- REG64_IDX(HSW_CS_GPR, 3),
- REG64_IDX(HSW_CS_GPR, 4),
- REG64_IDX(HSW_CS_GPR, 5),
- REG64_IDX(HSW_CS_GPR, 6),
- REG64_IDX(HSW_CS_GPR, 7),
- REG64_IDX(HSW_CS_GPR, 8),
- REG64_IDX(HSW_CS_GPR, 9),
- REG64_IDX(HSW_CS_GPR, 10),
- REG64_IDX(HSW_CS_GPR, 11),
- REG64_IDX(HSW_CS_GPR, 12),
- REG64_IDX(HSW_CS_GPR, 13),
- REG64_IDX(HSW_CS_GPR, 14),
- REG64_IDX(HSW_CS_GPR, 15),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 0),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 1),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 2),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 3),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 4),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 5),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 6),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 7),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 8),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 9),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 10),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 11),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 12),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 13),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 14),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 15),
REG32(HSW_SCRATCH1,
.mask = ~HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE,
.value = 0),
@@ -673,22 +687,22 @@ static const struct drm_i915_reg_descriptor gen9_blt_regs[] = {
REG32(BCS_SWCTRL),
REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
REG32_IDX(RING_CTX_TIMESTAMP, BLT_RING_BASE),
- REG64_IDX(BCS_GPR, 0),
- REG64_IDX(BCS_GPR, 1),
- REG64_IDX(BCS_GPR, 2),
- REG64_IDX(BCS_GPR, 3),
- REG64_IDX(BCS_GPR, 4),
- REG64_IDX(BCS_GPR, 5),
- REG64_IDX(BCS_GPR, 6),
- REG64_IDX(BCS_GPR, 7),
- REG64_IDX(BCS_GPR, 8),
- REG64_IDX(BCS_GPR, 9),
- REG64_IDX(BCS_GPR, 10),
- REG64_IDX(BCS_GPR, 11),
- REG64_IDX(BCS_GPR, 12),
- REG64_IDX(BCS_GPR, 13),
- REG64_IDX(BCS_GPR, 14),
- REG64_IDX(BCS_GPR, 15),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 0),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 1),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 2),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 3),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 4),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 5),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 6),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 7),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 8),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 9),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 10),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 11),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 12),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 13),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 14),
+ REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 15),
};
#undef REG64
@@ -939,15 +953,15 @@ static void fini_hash_table(struct intel_engine_cs *engine)
* struct intel_engine_cs based on whether the platform requires software
* command parsing.
*/
-void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
+int intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
{
const struct drm_i915_cmd_table *cmd_tables;
int cmd_table_count;
int ret;
- if (!IS_GEN(engine->i915, 7) && !(IS_GEN(engine->i915, 9) &&
- engine->class == COPY_ENGINE_CLASS))
- return;
+ if (GRAPHICS_VER(engine->i915) != 7 && !(GRAPHICS_VER(engine->i915) == 9 &&
+ engine->class == COPY_ENGINE_CLASS))
+ return 0;
switch (engine->class) {
case RENDER_CLASS:
@@ -976,7 +990,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
break;
case COPY_ENGINE_CLASS:
engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
- if (IS_GEN(engine->i915, 9)) {
+ if (GRAPHICS_VER(engine->i915) == 9) {
cmd_tables = gen9_blt_cmd_table;
cmd_table_count = ARRAY_SIZE(gen9_blt_cmd_table);
engine->get_cmd_length_mask =
@@ -992,7 +1006,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table);
}
- if (IS_GEN(engine->i915, 9)) {
+ if (GRAPHICS_VER(engine->i915) == 9) {
engine->reg_tables = gen9_blt_reg_tables;
engine->reg_table_count =
ARRAY_SIZE(gen9_blt_reg_tables);
@@ -1012,19 +1026,19 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
break;
default:
MISSING_CASE(engine->class);
- return;
+ goto out;
}
if (!validate_cmds_sorted(engine, cmd_tables, cmd_table_count)) {
drm_err(&engine->i915->drm,
"%s: command descriptions are not sorted\n",
engine->name);
- return;
+ goto out;
}
if (!validate_regs_sorted(engine)) {
drm_err(&engine->i915->drm,
"%s: registers are not sorted\n", engine->name);
- return;
+ goto out;
}
ret = init_hash_table(engine, cmd_tables, cmd_table_count);
@@ -1032,10 +1046,17 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
drm_err(&engine->i915->drm,
"%s: initialised failed!\n", engine->name);
fini_hash_table(engine);
- return;
+ goto out;
}
engine->flags |= I915_ENGINE_USING_CMD_PARSER;
+
+out:
+ if (intel_engine_requires_cmd_parser(engine) &&
+ !intel_engine_using_cmd_parser(engine))
+ return -EINVAL;
+
+ return 0;
}
/**
@@ -1136,27 +1157,31 @@ find_reg(const struct intel_engine_cs *engine, u32 addr)
/* Returns a vmap'd pointer to dst_obj, which the caller must unmap */
static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
struct drm_i915_gem_object *src_obj,
- u32 offset, u32 length)
+ unsigned long offset, unsigned long length,
+ bool *needs_clflush_after)
{
- bool needs_clflush;
+ unsigned int src_needs_clflush;
+ unsigned int dst_needs_clflush;
void *dst, *src;
int ret;
- dst = i915_gem_object_pin_map(dst_obj, I915_MAP_FORCE_WB);
+ ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
+ if (ret)
+ return ERR_PTR(ret);
+
+ dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB);
+ i915_gem_object_finish_access(dst_obj);
if (IS_ERR(dst))
return dst;
- ret = i915_gem_object_pin_pages(src_obj);
+ ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush);
if (ret) {
i915_gem_object_unpin_map(dst_obj);
return ERR_PTR(ret);
}
- needs_clflush =
- !(src_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ);
-
src = ERR_PTR(-ENODEV);
- if (needs_clflush && i915_has_memcpy_from_wc()) {
+ if (src_needs_clflush && i915_has_memcpy_from_wc()) {
src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
if (!IS_ERR(src)) {
i915_unaligned_memcpy_from_wc(dst,
@@ -1166,8 +1191,8 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
}
}
if (IS_ERR(src)) {
+ unsigned long x, n, remain;
void *ptr;
- int x, n;
/*
* We can avoid clflushing partial cachelines before the write
@@ -1177,33 +1202,44 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
* We don't care about copying too much here as we only
* validate up to the end of the batch.
*/
- if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
- length = round_up(length,
+ remain = length;
+ if (dst_needs_clflush & CLFLUSH_BEFORE)
+ remain = round_up(remain,
boot_cpu_data.x86_clflush_size);
ptr = dst;
x = offset_in_page(offset);
- for (n = offset >> PAGE_SHIFT; length; n++) {
- int len = min_t(int, length, PAGE_SIZE - x);
+ for (n = offset >> PAGE_SHIFT; remain; n++) {
+ int len = min(remain, PAGE_SIZE - x);
- src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
- if (needs_clflush)
+ src = kmap_local_page(i915_gem_object_get_page(src_obj, n));
+ if (src_needs_clflush)
drm_clflush_virt_range(src + x, len);
memcpy(ptr, src + x, len);
- kunmap_atomic(src);
+ kunmap_local(src);
ptr += len;
- length -= len;
+ remain -= len;
x = 0;
}
}
- i915_gem_object_unpin_pages(src_obj);
+ i915_gem_object_finish_access(src_obj);
+
+ memset32(dst + length, 0, (dst_obj->base.size - length) / sizeof(u32));
/* dst_obj is returned with vmap pinned */
+ *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
+
return dst;
}
+static inline bool cmd_desc_is(const struct drm_i915_cmd_descriptor * const desc,
+ const u32 cmd)
+{
+ return desc->cmd.value == (cmd & desc->cmd.mask);
+}
+
static bool check_cmd(const struct intel_engine_cs *engine,
const struct drm_i915_cmd_descriptor *desc,
const u32 *cmd, u32 length)
@@ -1242,19 +1278,19 @@ static bool check_cmd(const struct intel_engine_cs *engine,
* allowed mask/value pair given in the whitelist entry.
*/
if (reg->mask) {
- if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
+ if (cmd_desc_is(desc, MI_LOAD_REGISTER_MEM)) {
DRM_DEBUG("CMD: Rejected LRM to masked register 0x%08X\n",
reg_addr);
return false;
}
- if (desc->cmd.value == MI_LOAD_REGISTER_REG) {
+ if (cmd_desc_is(desc, MI_LOAD_REGISTER_REG)) {
DRM_DEBUG("CMD: Rejected LRR to masked register 0x%08X\n",
reg_addr);
return false;
}
- if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) &&
+ if (cmd_desc_is(desc, MI_LOAD_REGISTER_IMM(1)) &&
(offset + 2 > length ||
(cmd[offset + 1] & reg->mask) != reg->value)) {
DRM_DEBUG("CMD: Rejected LRI to masked register 0x%08X\n",
@@ -1386,11 +1422,6 @@ static unsigned long *alloc_whitelist(u32 batch_length)
#define LENGTH_BIAS 2
-static bool shadow_needs_clflush(struct drm_i915_gem_object *obj)
-{
- return !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
-}
-
/**
* intel_engine_cmd_parser() - parse a batch buffer for privilege violations
* @engine: the engine on which the batch is to execute
@@ -1398,7 +1429,7 @@ static bool shadow_needs_clflush(struct drm_i915_gem_object *obj)
* @batch_offset: byte offset in the batch at which execution starts
* @batch_length: length of the commands in batch_obj
* @shadow: validated copy of the batch buffer in question
- * @trampoline: whether to emit a conditional trampoline at the end of the batch
+ * @trampoline: true if we need to trampoline into privileged execution
*
* Parses the specified batch buffer looking for privilege violations as
* described in the overview.
@@ -1406,16 +1437,18 @@ static bool shadow_needs_clflush(struct drm_i915_gem_object *obj)
* Return: non-zero if the parser finds violations or otherwise fails; -EACCES
* if the batch appears legal but should use hardware parsing
*/
+
int intel_engine_cmd_parser(struct intel_engine_cs *engine,
struct i915_vma *batch,
- u32 batch_offset,
- u32 batch_length,
+ unsigned long batch_offset,
+ unsigned long batch_length,
struct i915_vma *shadow,
bool trampoline)
{
u32 *cmd, *batch_end, offset = 0;
struct drm_i915_cmd_descriptor default_desc = noop_desc;
const struct drm_i915_cmd_descriptor *desc = &default_desc;
+ bool needs_clflush_after = false;
unsigned long *jump_whitelist;
u64 batch_addr, shadow_addr;
int ret = 0;
@@ -1426,7 +1459,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
batch->size));
GEM_BUG_ON(!batch_length);
- cmd = copy_batch(shadow->obj, batch->obj, batch_offset, batch_length);
+ cmd = copy_batch(shadow->obj, batch->obj,
+ batch_offset, batch_length,
+ &needs_clflush_after);
if (IS_ERR(cmd)) {
DRM_DEBUG("CMD: Failed to copy batch\n");
return PTR_ERR(cmd);
@@ -1437,8 +1472,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
/* Defer failure until attempted use */
jump_whitelist = alloc_whitelist(batch_length);
- shadow_addr = gen8_canonical_addr(shadow->node.start);
- batch_addr = gen8_canonical_addr(batch->node.start + batch_offset);
+ shadow_addr = gen8_canonical_addr(i915_vma_offset(shadow));
+ batch_addr = gen8_canonical_addr(i915_vma_offset(batch) + batch_offset);
/*
* We use the batch length as size because the shadow object is as
@@ -1478,7 +1513,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
break;
}
- if (desc->cmd.value == MI_BATCH_BUFFER_START) {
+ if (cmd_desc_is(desc, MI_BATCH_BUFFER_START)) {
ret = check_bbstart(cmd, offset, length, batch_length,
batch_addr, shadow_addr,
jump_whitelist);
@@ -1525,7 +1560,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
if (IS_HASWELL(engine->i915))
flags = MI_BATCH_NON_SECURE_HSW;
- GEM_BUG_ON(!IS_GEN_RANGE(engine->i915, 6, 7));
+ GEM_BUG_ON(!IS_GRAPHICS_VER(engine->i915, 6, 7));
__gen6_emit_bb_start(batch_end,
batch_addr,
flags);
@@ -1533,16 +1568,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
ret = 0; /* allow execution */
}
}
-
- if (shadow_needs_clflush(shadow->obj))
- drm_clflush_virt_range(batch_end, 8);
}
- if (shadow_needs_clflush(shadow->obj)) {
- void *ptr = page_mask_bits(shadow->obj->mm.mapping);
-
- drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr);
- }
+ i915_gem_object_flush_map(shadow->obj);
if (!IS_ERR_OR_NULL(jump_whitelist))
kfree(jump_whitelist);