diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_cmd_parser.c')
| -rw-r--r-- | drivers/gpu/drm/i915/i915_cmd_parser.c | 200 |
1 files changed, 114 insertions, 86 deletions
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 372354d33f55..7654f1be8d3b 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -25,10 +25,20 @@ * */ +#include <linux/highmem.h> + +#include <drm/drm_cache.h> +#include <drm/drm_print.h> + #include "gt/intel_engine.h" +#include "gt/intel_engine_regs.h" +#include "gt/intel_gpu_commands.h" +#include "gt/intel_gt_regs.h" +#include "i915_cmd_parser.h" #include "i915_drv.h" #include "i915_memcpy.h" +#include "i915_reg.h" /** * DOC: batch buffer command parser @@ -590,6 +600,10 @@ struct drm_i915_reg_descriptor { { .addr = _reg(idx) }, \ { .addr = _reg ## _UDW(idx) } +#define REG64_BASE_IDX(_reg, base, idx) \ + { .addr = _reg(base, idx) }, \ + { .addr = _reg ## _UDW(base, idx) } + static const struct drm_i915_reg_descriptor gen7_render_regs[] = { REG64(GPGPU_THREADS_DISPATCHED), REG64(HS_INVOCATION_COUNT), @@ -604,8 +618,8 @@ static const struct drm_i915_reg_descriptor gen7_render_regs[] = { REG64(PS_INVOCATION_COUNT), REG64(PS_DEPTH_COUNT), REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE), - REG64(MI_PREDICATE_SRC0), - REG64(MI_PREDICATE_SRC1), + REG64_IDX(MI_PREDICATE_SRC0, RENDER_RING_BASE), + REG64_IDX(MI_PREDICATE_SRC1, RENDER_RING_BASE), REG32(GEN7_3DPRIM_END_OFFSET), REG32(GEN7_3DPRIM_START_VERTEX), REG32(GEN7_3DPRIM_VERTEX_COUNT), @@ -635,22 +649,22 @@ static const struct drm_i915_reg_descriptor gen7_render_regs[] = { }; static const struct drm_i915_reg_descriptor hsw_render_regs[] = { - REG64_IDX(HSW_CS_GPR, 0), - REG64_IDX(HSW_CS_GPR, 1), - REG64_IDX(HSW_CS_GPR, 2), - REG64_IDX(HSW_CS_GPR, 3), - REG64_IDX(HSW_CS_GPR, 4), - REG64_IDX(HSW_CS_GPR, 5), - REG64_IDX(HSW_CS_GPR, 6), - REG64_IDX(HSW_CS_GPR, 7), - REG64_IDX(HSW_CS_GPR, 8), - REG64_IDX(HSW_CS_GPR, 9), - REG64_IDX(HSW_CS_GPR, 10), - REG64_IDX(HSW_CS_GPR, 11), - REG64_IDX(HSW_CS_GPR, 12), - REG64_IDX(HSW_CS_GPR, 13), - REG64_IDX(HSW_CS_GPR, 14), - REG64_IDX(HSW_CS_GPR, 15), + REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 0), + REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 1), + REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 2), + REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 3), + REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 4), + REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 5), + REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 6), + REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 7), + REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 8), + REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 9), + REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 10), + REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 11), + REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 12), + REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 13), + REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 14), + REG64_BASE_IDX(GEN8_RING_CS_GPR, RENDER_RING_BASE, 15), REG32(HSW_SCRATCH1, .mask = ~HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE, .value = 0), @@ -673,22 +687,22 @@ static const struct drm_i915_reg_descriptor gen9_blt_regs[] = { REG32(BCS_SWCTRL), REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), REG32_IDX(RING_CTX_TIMESTAMP, BLT_RING_BASE), - REG64_IDX(BCS_GPR, 0), - REG64_IDX(BCS_GPR, 1), - REG64_IDX(BCS_GPR, 2), - REG64_IDX(BCS_GPR, 3), - REG64_IDX(BCS_GPR, 4), - REG64_IDX(BCS_GPR, 5), - REG64_IDX(BCS_GPR, 6), - REG64_IDX(BCS_GPR, 7), - REG64_IDX(BCS_GPR, 8), - REG64_IDX(BCS_GPR, 9), - REG64_IDX(BCS_GPR, 10), - REG64_IDX(BCS_GPR, 11), - REG64_IDX(BCS_GPR, 12), - REG64_IDX(BCS_GPR, 13), - REG64_IDX(BCS_GPR, 14), - REG64_IDX(BCS_GPR, 15), + REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 0), + REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 1), + REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 2), + REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 3), + REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 4), + REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 5), + REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 6), + REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 7), + REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 8), + REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 9), + REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 10), + REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 11), + REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 12), + REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 13), + REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 14), + REG64_BASE_IDX(GEN8_RING_CS_GPR, BLT_RING_BASE, 15), }; #undef REG64 @@ -939,15 +953,15 @@ static void fini_hash_table(struct intel_engine_cs *engine) * struct intel_engine_cs based on whether the platform requires software * command parsing. */ -void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) +int intel_engine_init_cmd_parser(struct intel_engine_cs *engine) { const struct drm_i915_cmd_table *cmd_tables; int cmd_table_count; int ret; - if (!IS_GEN(engine->i915, 7) && !(IS_GEN(engine->i915, 9) && - engine->class == COPY_ENGINE_CLASS)) - return; + if (GRAPHICS_VER(engine->i915) != 7 && !(GRAPHICS_VER(engine->i915) == 9 && + engine->class == COPY_ENGINE_CLASS)) + return 0; switch (engine->class) { case RENDER_CLASS: @@ -976,7 +990,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) break; case COPY_ENGINE_CLASS: engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; - if (IS_GEN(engine->i915, 9)) { + if (GRAPHICS_VER(engine->i915) == 9) { cmd_tables = gen9_blt_cmd_table; cmd_table_count = ARRAY_SIZE(gen9_blt_cmd_table); engine->get_cmd_length_mask = @@ -992,7 +1006,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table); } - if (IS_GEN(engine->i915, 9)) { + if (GRAPHICS_VER(engine->i915) == 9) { engine->reg_tables = gen9_blt_reg_tables; engine->reg_table_count = ARRAY_SIZE(gen9_blt_reg_tables); @@ -1012,19 +1026,19 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) break; default: MISSING_CASE(engine->class); - return; + goto out; } if (!validate_cmds_sorted(engine, cmd_tables, cmd_table_count)) { drm_err(&engine->i915->drm, "%s: command descriptions are not sorted\n", engine->name); - return; + goto out; } if (!validate_regs_sorted(engine)) { drm_err(&engine->i915->drm, "%s: registers are not sorted\n", engine->name); - return; + goto out; } ret = init_hash_table(engine, cmd_tables, cmd_table_count); @@ -1032,10 +1046,17 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) drm_err(&engine->i915->drm, "%s: initialised failed!\n", engine->name); fini_hash_table(engine); - return; + goto out; } engine->flags |= I915_ENGINE_USING_CMD_PARSER; + +out: + if (intel_engine_requires_cmd_parser(engine) && + !intel_engine_using_cmd_parser(engine)) + return -EINVAL; + + return 0; } /** @@ -1136,27 +1157,31 @@ find_reg(const struct intel_engine_cs *engine, u32 addr) /* Returns a vmap'd pointer to dst_obj, which the caller must unmap */ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, struct drm_i915_gem_object *src_obj, - u32 offset, u32 length) + unsigned long offset, unsigned long length, + bool *needs_clflush_after) { - bool needs_clflush; + unsigned int src_needs_clflush; + unsigned int dst_needs_clflush; void *dst, *src; int ret; - dst = i915_gem_object_pin_map(dst_obj, I915_MAP_FORCE_WB); + ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush); + if (ret) + return ERR_PTR(ret); + + dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB); + i915_gem_object_finish_access(dst_obj); if (IS_ERR(dst)) return dst; - ret = i915_gem_object_pin_pages(src_obj); + ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush); if (ret) { i915_gem_object_unpin_map(dst_obj); return ERR_PTR(ret); } - needs_clflush = - !(src_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ); - src = ERR_PTR(-ENODEV); - if (needs_clflush && i915_has_memcpy_from_wc()) { + if (src_needs_clflush && i915_has_memcpy_from_wc()) { src = i915_gem_object_pin_map(src_obj, I915_MAP_WC); if (!IS_ERR(src)) { i915_unaligned_memcpy_from_wc(dst, @@ -1166,8 +1191,8 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, } } if (IS_ERR(src)) { + unsigned long x, n, remain; void *ptr; - int x, n; /* * We can avoid clflushing partial cachelines before the write @@ -1177,33 +1202,44 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, * We don't care about copying too much here as we only * validate up to the end of the batch. */ - if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) - length = round_up(length, + remain = length; + if (dst_needs_clflush & CLFLUSH_BEFORE) + remain = round_up(remain, boot_cpu_data.x86_clflush_size); ptr = dst; x = offset_in_page(offset); - for (n = offset >> PAGE_SHIFT; length; n++) { - int len = min_t(int, length, PAGE_SIZE - x); + for (n = offset >> PAGE_SHIFT; remain; n++) { + int len = min(remain, PAGE_SIZE - x); - src = kmap_atomic(i915_gem_object_get_page(src_obj, n)); - if (needs_clflush) + src = kmap_local_page(i915_gem_object_get_page(src_obj, n)); + if (src_needs_clflush) drm_clflush_virt_range(src + x, len); memcpy(ptr, src + x, len); - kunmap_atomic(src); + kunmap_local(src); ptr += len; - length -= len; + remain -= len; x = 0; } } - i915_gem_object_unpin_pages(src_obj); + i915_gem_object_finish_access(src_obj); + + memset32(dst + length, 0, (dst_obj->base.size - length) / sizeof(u32)); /* dst_obj is returned with vmap pinned */ + *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER; + return dst; } +static inline bool cmd_desc_is(const struct drm_i915_cmd_descriptor * const desc, + const u32 cmd) +{ + return desc->cmd.value == (cmd & desc->cmd.mask); +} + static bool check_cmd(const struct intel_engine_cs *engine, const struct drm_i915_cmd_descriptor *desc, const u32 *cmd, u32 length) @@ -1242,19 +1278,19 @@ static bool check_cmd(const struct intel_engine_cs *engine, * allowed mask/value pair given in the whitelist entry. */ if (reg->mask) { - if (desc->cmd.value == MI_LOAD_REGISTER_MEM) { + if (cmd_desc_is(desc, MI_LOAD_REGISTER_MEM)) { DRM_DEBUG("CMD: Rejected LRM to masked register 0x%08X\n", reg_addr); return false; } - if (desc->cmd.value == MI_LOAD_REGISTER_REG) { + if (cmd_desc_is(desc, MI_LOAD_REGISTER_REG)) { DRM_DEBUG("CMD: Rejected LRR to masked register 0x%08X\n", reg_addr); return false; } - if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) && + if (cmd_desc_is(desc, MI_LOAD_REGISTER_IMM(1)) && (offset + 2 > length || (cmd[offset + 1] & reg->mask) != reg->value)) { DRM_DEBUG("CMD: Rejected LRI to masked register 0x%08X\n", @@ -1386,11 +1422,6 @@ static unsigned long *alloc_whitelist(u32 batch_length) #define LENGTH_BIAS 2 -static bool shadow_needs_clflush(struct drm_i915_gem_object *obj) -{ - return !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE); -} - /** * intel_engine_cmd_parser() - parse a batch buffer for privilege violations * @engine: the engine on which the batch is to execute @@ -1398,7 +1429,7 @@ static bool shadow_needs_clflush(struct drm_i915_gem_object *obj) * @batch_offset: byte offset in the batch at which execution starts * @batch_length: length of the commands in batch_obj * @shadow: validated copy of the batch buffer in question - * @trampoline: whether to emit a conditional trampoline at the end of the batch + * @trampoline: true if we need to trampoline into privileged execution * * Parses the specified batch buffer looking for privilege violations as * described in the overview. @@ -1406,16 +1437,18 @@ static bool shadow_needs_clflush(struct drm_i915_gem_object *obj) * Return: non-zero if the parser finds violations or otherwise fails; -EACCES * if the batch appears legal but should use hardware parsing */ + int intel_engine_cmd_parser(struct intel_engine_cs *engine, struct i915_vma *batch, - u32 batch_offset, - u32 batch_length, + unsigned long batch_offset, + unsigned long batch_length, struct i915_vma *shadow, bool trampoline) { u32 *cmd, *batch_end, offset = 0; struct drm_i915_cmd_descriptor default_desc = noop_desc; const struct drm_i915_cmd_descriptor *desc = &default_desc; + bool needs_clflush_after = false; unsigned long *jump_whitelist; u64 batch_addr, shadow_addr; int ret = 0; @@ -1426,7 +1459,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, batch->size)); GEM_BUG_ON(!batch_length); - cmd = copy_batch(shadow->obj, batch->obj, batch_offset, batch_length); + cmd = copy_batch(shadow->obj, batch->obj, + batch_offset, batch_length, + &needs_clflush_after); if (IS_ERR(cmd)) { DRM_DEBUG("CMD: Failed to copy batch\n"); return PTR_ERR(cmd); @@ -1437,8 +1472,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, /* Defer failure until attempted use */ jump_whitelist = alloc_whitelist(batch_length); - shadow_addr = gen8_canonical_addr(shadow->node.start); - batch_addr = gen8_canonical_addr(batch->node.start + batch_offset); + shadow_addr = gen8_canonical_addr(i915_vma_offset(shadow)); + batch_addr = gen8_canonical_addr(i915_vma_offset(batch) + batch_offset); /* * We use the batch length as size because the shadow object is as @@ -1478,7 +1513,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, break; } - if (desc->cmd.value == MI_BATCH_BUFFER_START) { + if (cmd_desc_is(desc, MI_BATCH_BUFFER_START)) { ret = check_bbstart(cmd, offset, length, batch_length, batch_addr, shadow_addr, jump_whitelist); @@ -1525,7 +1560,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, if (IS_HASWELL(engine->i915)) flags = MI_BATCH_NON_SECURE_HSW; - GEM_BUG_ON(!IS_GEN_RANGE(engine->i915, 6, 7)); + GEM_BUG_ON(!IS_GRAPHICS_VER(engine->i915, 6, 7)); __gen6_emit_bb_start(batch_end, batch_addr, flags); @@ -1533,16 +1568,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, ret = 0; /* allow execution */ } } - - if (shadow_needs_clflush(shadow->obj)) - drm_clflush_virt_range(batch_end, 8); } - if (shadow_needs_clflush(shadow->obj)) { - void *ptr = page_mask_bits(shadow->obj->mm.mapping); - - drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr); - } + i915_gem_object_flush_map(shadow->obj); if (!IS_ERR_OR_NULL(jump_whitelist)) kfree(jump_whitelist); |
