diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gvt/cmd_parser.c')
| -rw-r--r-- | drivers/gpu/drm/i915/gvt/cmd_parser.c | 1133 |
1 files changed, 782 insertions, 351 deletions
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 713848c36349..df04e4ead8ea 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -35,11 +35,30 @@ */ #include <linux/slab.h> + +#include <drm/drm_print.h> + #include "i915_drv.h" +#include "i915_reg.h" +#include "display/intel_display_regs.h" +#include "gt/intel_engine_regs.h" +#include "gt/intel_gpu_commands.h" +#include "gt/intel_gt_regs.h" +#include "gt/intel_lrc.h" +#include "gt/intel_ring.h" +#include "gt/intel_gt_requests.h" +#include "gt/shmem_utils.h" #include "gvt.h" #include "i915_pvinfo.h" #include "trace.h" +#include "display/i9xx_plane_regs.h" +#include "display/intel_display_core.h" +#include "display/intel_sprite_regs.h" +#include "gem/i915_gem_context.h" +#include "gem/i915_gem_pm.h" +#include "gt/intel_context.h" + #define INVALID_OP (~0U) #define OP_LEN_MI 9 @@ -55,10 +74,10 @@ struct sub_op_bits { int low; }; struct decode_info { - char *name; + const char *name; int op_len; int nr_sub_op; - struct sub_op_bits *sub_op; + const struct sub_op_bits *sub_op; }; #define MAX_CMD_BUDGET 0x7fffffff @@ -162,6 +181,7 @@ struct decode_info { #define OP_STATE_BASE_ADDRESS OP_3D_MEDIA(0x0, 0x1, 0x01) #define OP_STATE_SIP OP_3D_MEDIA(0x0, 0x1, 0x02) #define OP_3D_MEDIA_0_1_4 OP_3D_MEDIA(0x0, 0x1, 0x04) +#define OP_SWTESS_BASE_ADDRESS OP_3D_MEDIA(0x0, 0x1, 0x03) #define OP_3DSTATE_VF_STATISTICS_GM45 OP_3D_MEDIA(0x1, 0x0, 0x0B) @@ -172,6 +192,7 @@ struct decode_info { #define OP_MEDIA_INTERFACE_DESCRIPTOR_LOAD OP_3D_MEDIA(0x2, 0x0, 0x2) #define OP_MEDIA_GATEWAY_STATE OP_3D_MEDIA(0x2, 0x0, 0x3) #define OP_MEDIA_STATE_FLUSH OP_3D_MEDIA(0x2, 0x0, 0x4) +#define OP_MEDIA_POOL_STATE OP_3D_MEDIA(0x2, 0x0, 0x5) #define OP_MEDIA_OBJECT OP_3D_MEDIA(0x2, 0x1, 0x0) #define OP_MEDIA_OBJECT_PRT OP_3D_MEDIA(0x2, 0x1, 0x2) @@ -373,35 +394,51 @@ typedef int (*parser_cmd_handler)(struct parser_exec_state *s); #define ADDR_FIX_4(x1, x2, x3, x4) (ADDR_FIX_1(x1) | ADDR_FIX_3(x2, x3, x4)) #define ADDR_FIX_5(x1, x2, x3, x4, x5) (ADDR_FIX_1(x1) | ADDR_FIX_4(x2, x3, x4, x5)) +#define DWORD_FIELD(dword, end, start) \ + FIELD_GET(GENMASK(end, start), cmd_val(s, dword)) + +#define OP_LENGTH_BIAS 2 +#define CMD_LEN(value) (value + OP_LENGTH_BIAS) + +static int gvt_check_valid_cmd_length(int len, int valid_len) +{ + if (valid_len != len) { + gvt_err("len is not valid: len=%u valid_len=%u\n", + len, valid_len); + return -EFAULT; + } + return 0; +} + struct cmd_info { - char *name; + const char *name; u32 opcode; -#define F_LEN_MASK (1U<<0) +#define F_LEN_MASK 3U #define F_LEN_CONST 1U #define F_LEN_VAR 0U +/* value is const although LEN maybe variable */ +#define F_LEN_VAR_FIXED (1<<1) /* * command has its own ip advance logic * e.g. MI_BATCH_START, MI_BATCH_END */ -#define F_IP_ADVANCE_CUSTOM (1<<1) - -#define F_POST_HANDLE (1<<2) +#define F_IP_ADVANCE_CUSTOM (1<<2) u32 flag; -#define R_RCS (1 << RCS) -#define R_VCS1 (1 << VCS) -#define R_VCS2 (1 << VCS2) +#define R_RCS BIT(RCS0) +#define R_VCS1 BIT(VCS0) +#define R_VCS2 BIT(VCS1) #define R_VCS (R_VCS1 | R_VCS2) -#define R_BCS (1 << BCS) -#define R_VECS (1 << VECS) +#define R_BCS BIT(BCS0) +#define R_VECS BIT(VECS0) #define R_ALL (R_RCS | R_VCS | R_BCS | R_VECS) /* rings that support this cmd: BLT/RCS/VCS/VECS */ - uint16_t rings; + intel_engine_mask_t rings; /* devices that support this cmd: SNB/IVB/HSW/... */ - uint16_t devices; + u16 devices; /* which DWords are address that need fix up. * bit 0 means a 32-bit non address operand in command @@ -411,26 +448,30 @@ struct cmd_info { * No matter the address length, each address only takes * one bit in the bitmap. */ - uint16_t addr_bitmap; + u16 addr_bitmap; /* flag == F_LEN_CONST : command length * flag == F_LEN_VAR : length bias bits * Note: length is in DWord */ - uint8_t len; + u32 len; parser_cmd_handler handler; + + /* valid length in DWord */ + u32 valid_len; }; struct cmd_entry { struct hlist_node hlist; - struct cmd_info *info; + const struct cmd_info *info; }; enum { RING_BUFFER_INSTRUCTION, BATCH_BUFFER_INSTRUCTION, BATCH_BUFFER_2ND_LEVEL, + RING_BUFFER_CTX, }; enum { @@ -440,7 +481,7 @@ enum { struct parser_exec_state { struct intel_vgpu *vgpu; - int ring_id; + const struct intel_engine_cs *engine; int buf_type; @@ -471,8 +512,10 @@ struct parser_exec_state { * used when ret from 2nd level batch buffer */ int saved_buf_addr_type; + bool is_ctx_wa; + bool is_init_ctx; - struct cmd_info *info; + const struct cmd_info *info; struct intel_vgpu_workload *workload; }; @@ -483,12 +526,12 @@ struct parser_exec_state { static unsigned long bypass_scan_mask = 0; /* ring ALL, type = 0 */ -static struct sub_op_bits sub_op_mi[] = { +static const struct sub_op_bits sub_op_mi[] = { {31, 29}, {28, 23}, }; -static struct decode_info decode_info_mi = { +static const struct decode_info decode_info_mi = { "MI", OP_LEN_MI, ARRAY_SIZE(sub_op_mi), @@ -496,12 +539,12 @@ static struct decode_info decode_info_mi = { }; /* ring RCS, command type 2 */ -static struct sub_op_bits sub_op_2d[] = { +static const struct sub_op_bits sub_op_2d[] = { {31, 29}, {28, 22}, }; -static struct decode_info decode_info_2d = { +static const struct decode_info decode_info_2d = { "2D", OP_LEN_2D, ARRAY_SIZE(sub_op_2d), @@ -509,14 +552,14 @@ static struct decode_info decode_info_2d = { }; /* ring RCS, command type 3 */ -static struct sub_op_bits sub_op_3d_media[] = { +static const struct sub_op_bits sub_op_3d_media[] = { {31, 29}, {28, 27}, {26, 24}, {23, 16}, }; -static struct decode_info decode_info_3d_media = { +static const struct decode_info decode_info_3d_media = { "3D_Media", OP_LEN_3D_MEDIA, ARRAY_SIZE(sub_op_3d_media), @@ -524,7 +567,7 @@ static struct decode_info decode_info_3d_media = { }; /* ring VCS, command type 3 */ -static struct sub_op_bits sub_op_mfx_vc[] = { +static const struct sub_op_bits sub_op_mfx_vc[] = { {31, 29}, {28, 27}, {26, 24}, @@ -532,7 +575,7 @@ static struct sub_op_bits sub_op_mfx_vc[] = { {20, 16}, }; -static struct decode_info decode_info_mfx_vc = { +static const struct decode_info decode_info_mfx_vc = { "MFX_VC", OP_LEN_MFX_VC, ARRAY_SIZE(sub_op_mfx_vc), @@ -540,7 +583,7 @@ static struct decode_info decode_info_mfx_vc = { }; /* ring VECS, command type 3 */ -static struct sub_op_bits sub_op_vebox[] = { +static const struct sub_op_bits sub_op_vebox[] = { {31, 29}, {28, 27}, {26, 24}, @@ -548,15 +591,15 @@ static struct sub_op_bits sub_op_vebox[] = { {20, 16}, }; -static struct decode_info decode_info_vebox = { +static const struct decode_info decode_info_vebox = { "VEBOX", OP_LEN_VEBOX, ARRAY_SIZE(sub_op_vebox), sub_op_vebox, }; -static struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { - [RCS] = { +static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { + [RCS0] = { &decode_info_mi, NULL, NULL, @@ -567,7 +610,7 @@ static struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { NULL, }, - [VCS] = { + [VCS0] = { &decode_info_mi, NULL, NULL, @@ -578,7 +621,7 @@ static struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { NULL, }, - [BCS] = { + [BCS0] = { &decode_info_mi, NULL, &decode_info_2d, @@ -589,7 +632,7 @@ static struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { NULL, }, - [VECS] = { + [VECS0] = { &decode_info_mi, NULL, NULL, @@ -600,7 +643,7 @@ static struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { NULL, }, - [VCS2] = { + [VCS1] = { &decode_info_mi, NULL, NULL, @@ -612,40 +655,42 @@ static struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { }, }; -static inline u32 get_opcode(u32 cmd, int ring_id) +static inline u32 get_opcode(u32 cmd, const struct intel_engine_cs *engine) { - struct decode_info *d_info; + const struct decode_info *d_info; - d_info = ring_decode_info[ring_id][CMD_TYPE(cmd)]; + d_info = ring_decode_info[engine->id][CMD_TYPE(cmd)]; if (d_info == NULL) return INVALID_OP; return cmd >> (32 - d_info->op_len); } -static inline struct cmd_info *find_cmd_entry(struct intel_gvt *gvt, - unsigned int opcode, int ring_id) +static inline const struct cmd_info * +find_cmd_entry(struct intel_gvt *gvt, unsigned int opcode, + const struct intel_engine_cs *engine) { struct cmd_entry *e; hash_for_each_possible(gvt->cmd_table, e, hlist, opcode) { - if ((opcode == e->info->opcode) && - (e->info->rings & (1 << ring_id))) + if (opcode == e->info->opcode && + e->info->rings & engine->mask) return e->info; } return NULL; } -static inline struct cmd_info *get_cmd_info(struct intel_gvt *gvt, - u32 cmd, int ring_id) +static inline const struct cmd_info * +get_cmd_info(struct intel_gvt *gvt, u32 cmd, + const struct intel_engine_cs *engine) { u32 opcode; - opcode = get_opcode(cmd, ring_id); + opcode = get_opcode(cmd, engine); if (opcode == INVALID_OP) return NULL; - return find_cmd_entry(gvt, opcode, ring_id); + return find_cmd_entry(gvt, opcode, engine); } static inline u32 sub_op_val(u32 cmd, u32 hi, u32 low) @@ -653,12 +698,12 @@ static inline u32 sub_op_val(u32 cmd, u32 hi, u32 low) return (cmd >> low) & ((1U << (hi - low + 1)) - 1); } -static inline void print_opcode(u32 cmd, int ring_id) +static inline void print_opcode(u32 cmd, const struct intel_engine_cs *engine) { - struct decode_info *d_info; + const struct decode_info *d_info; int i; - d_info = ring_decode_info[ring_id][CMD_TYPE(cmd)]; + d_info = ring_decode_info[engine->id][CMD_TYPE(cmd)]; if (d_info == NULL) return; @@ -682,19 +727,26 @@ static inline u32 cmd_val(struct parser_exec_state *s, int index) return *cmd_ptr(s, index); } +static inline bool is_init_ctx(struct parser_exec_state *s) +{ + return (s->buf_type == RING_BUFFER_CTX && s->is_init_ctx); +} + static void parser_exec_state_dump(struct parser_exec_state *s) { int cnt = 0; int i; - gvt_dbg_cmd(" vgpu%d RING%d: ring_start(%08lx) ring_end(%08lx)" - " ring_head(%08lx) ring_tail(%08lx)\n", s->vgpu->id, - s->ring_id, s->ring_start, s->ring_start + s->ring_size, - s->ring_head, s->ring_tail); + gvt_dbg_cmd(" vgpu%d RING%s: ring_start(%08lx) ring_end(%08lx)" + " ring_head(%08lx) ring_tail(%08lx)\n", + s->vgpu->id, s->engine->name, + s->ring_start, s->ring_start + s->ring_size, + s->ring_head, s->ring_tail); gvt_dbg_cmd(" %s %s ip_gma(%08lx) ", s->buf_type == RING_BUFFER_INSTRUCTION ? - "RING_BUFFER" : "BATCH_BUFFER", + "RING_BUFFER" : ((s->buf_type == RING_BUFFER_CTX) ? + "CTX_BUFFER" : "BATCH_BUFFER"), s->buf_addr_type == GTT_BUFFER ? "GTT" : "PPGTT", s->ip_gma); @@ -707,20 +759,15 @@ static void parser_exec_state_dump(struct parser_exec_state *s) s->ip_va, cmd_val(s, 0), cmd_val(s, 1), cmd_val(s, 2), cmd_val(s, 3)); - print_opcode(cmd_val(s, 0), s->ring_id); - - /* print the whole page to trace */ - pr_err(" ip_va=%p: %08x %08x %08x %08x\n", - s->ip_va, cmd_val(s, 0), cmd_val(s, 1), - cmd_val(s, 2), cmd_val(s, 3)); + print_opcode(cmd_val(s, 0), s->engine); s->ip_va = (u32 *)((((u64)s->ip_va) >> 12) << 12); while (cnt < 1024) { - pr_err("ip_va=%p: ", s->ip_va); + gvt_dbg_cmd("ip_va=%p: ", s->ip_va); for (i = 0; i < 8; i++) - pr_err("%08x ", cmd_val(s, i)); - pr_err("\n"); + gvt_dbg_cmd("%08x ", cmd_val(s, i)); + gvt_dbg_cmd("\n"); s->ip_va += 8 * sizeof(u32); cnt += 8; @@ -734,7 +781,8 @@ static inline void update_ip_va(struct parser_exec_state *s) if (WARN_ON(s->ring_head == s->ring_tail)) return; - if (s->buf_type == RING_BUFFER_INSTRUCTION) { + if (s->buf_type == RING_BUFFER_INSTRUCTION || + s->buf_type == RING_BUFFER_CTX) { unsigned long ring_top = s->ring_start + s->ring_size; if (s->ring_head > s->ring_tail) { @@ -779,7 +827,7 @@ static inline int ip_gma_advance(struct parser_exec_state *s, return 0; } -static inline int get_cmd_length(struct cmd_info *info, u32 cmd) +static inline int get_cmd_length(const struct cmd_info *info, u32 cmd) { if ((info->flag & F_LEN_MASK) == F_LEN_CONST) return info->len; @@ -798,33 +846,48 @@ static inline int cmd_length(struct parser_exec_state *s) *addr = val; \ } while (0) -static bool is_shadowed_mmio(unsigned int offset) +static inline bool is_mocs_mmio(unsigned int offset) { - bool ret = false; - - if ((offset == 0x2168) || /*BB current head register UDW */ - (offset == 0x2140) || /*BB current header register */ - (offset == 0x211c) || /*second BB header register UDW */ - (offset == 0x2114)) { /*second BB header register UDW */ - ret = true; - } - return ret; + return ((offset >= 0xc800) && (offset <= 0xcff8)) || + ((offset >= 0xb020) && (offset <= 0xb0a0)); } -static inline bool is_force_nonpriv_mmio(unsigned int offset) +static int is_cmd_update_pdps(unsigned int offset, + struct parser_exec_state *s) { - return (offset >= 0x24d0 && offset < 0x2500); + u32 base = s->workload->engine->mmio_base; + return i915_mmio_reg_equal(_MMIO(offset), GEN8_RING_PDP_UDW(base, 0)); } -static int force_nonpriv_reg_handler(struct parser_exec_state *s, - unsigned int offset, unsigned int index) +static int cmd_pdp_mmio_update_handler(struct parser_exec_state *s, + unsigned int offset, unsigned int index) { - struct intel_gvt *gvt = s->vgpu->gvt; - unsigned int data = cmd_val(s, index + 1); - - if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data)) { - gvt_err("Unexpected forcenonpriv 0x%x LRI write, value=0x%x\n", - offset, data); + struct intel_vgpu *vgpu = s->vgpu; + struct intel_vgpu_mm *shadow_mm = s->workload->shadow_mm; + struct intel_vgpu_mm *mm; + u64 pdps[GEN8_3LVL_PDPES]; + + if (shadow_mm->ppgtt_mm.root_entry_type == + GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { + pdps[0] = (u64)cmd_val(s, 2) << 32; + pdps[0] |= cmd_val(s, 4); + + mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps); + if (!mm) { + gvt_vgpu_err("failed to get the 4-level shadow vm\n"); + return -EINVAL; + } + intel_vgpu_mm_get(mm); + list_add_tail(&mm->ppgtt_mm.link, + &s->workload->lri_shadow_mm); + *cmd_ptr(s, 2) = upper_32_bits(mm->ppgtt_mm.shadow_pdps[0]); + *cmd_ptr(s, 4) = lower_32_bits(mm->ppgtt_mm.shadow_pdps[0]); + } else { + /* Currently all guests use PML4 table and now can't + * have a guest with 3-level table but uses LRI for + * PPGTT update. So this is simply un-testable. */ + GEM_BUG_ON(1); + gvt_vgpu_err("invalid shared shadow vm type\n"); return -EINVAL; } return 0; @@ -835,26 +898,68 @@ static int cmd_reg_handler(struct parser_exec_state *s, { struct intel_vgpu *vgpu = s->vgpu; struct intel_gvt *gvt = vgpu->gvt; + u32 ctx_sr_ctl; + u32 *vreg, vreg_old; if (offset + 4 > gvt->device_info.mmio_size) { gvt_vgpu_err("%s access to (%x) outside of MMIO range\n", cmd, offset); - return -EINVAL; + return -EFAULT; } - if (!intel_gvt_mmio_is_cmd_access(gvt, offset)) { + if (is_init_ctx(s)) { + struct intel_gvt_mmio_info *mmio_info; + + intel_gvt_mmio_set_cmd_accessible(gvt, offset); + mmio_info = intel_gvt_find_mmio_info(gvt, offset); + if (mmio_info && mmio_info->write) + intel_gvt_mmio_set_cmd_write_patch(gvt, offset); + return 0; + } + + if (!intel_gvt_mmio_is_cmd_accessible(gvt, offset)) { gvt_vgpu_err("%s access to non-render register (%x)\n", cmd, offset); - return 0; + return -EBADRQC; } - if (is_shadowed_mmio(offset)) { - gvt_vgpu_err("found access of shadowed MMIO %x\n", offset); + if (!strncmp(cmd, "srm", 3) || + !strncmp(cmd, "lrm", 3)) { + if (offset == i915_mmio_reg_offset(GEN8_L3SQCREG4) || + offset == 0x21f0 || + (IS_BROADWELL(gvt->gt->i915) && + offset == i915_mmio_reg_offset(INSTPM))) + return 0; + else { + gvt_vgpu_err("%s access to register (%x)\n", + cmd, offset); + return -EPERM; + } + } + + if (!strncmp(cmd, "lrr-src", 7) || + !strncmp(cmd, "lrr-dst", 7)) { + if (IS_BROADWELL(gvt->gt->i915) && offset == 0x215c) + return 0; + else { + gvt_vgpu_err("not allowed cmd %s reg (%x)\n", cmd, offset); + return -EPERM; + } + } + + if (!strncmp(cmd, "pipe_ctrl", 9)) { + /* TODO: add LRI POST logic here */ return 0; } - if (is_force_nonpriv_mmio(offset) && - force_nonpriv_reg_handler(s, offset, index)) + if (strncmp(cmd, "lri", 3)) + return -EPERM; + + /* below are all lri handlers */ + vreg = &vgpu_vreg(s->vgpu, offset); + + if (is_cmd_update_pdps(offset, s) && + cmd_pdp_mmio_update_handler(s, offset, index)) return -EINVAL; if (offset == i915_mmio_reg_offset(DERRMR) || @@ -863,8 +968,70 @@ static int cmd_reg_handler(struct parser_exec_state *s, patch_value(s, cmd_ptr(s, index), VGT_PVINFO_PAGE); } - /* TODO: Update the global mask if this MMIO is a masked-MMIO */ - intel_gvt_mmio_set_cmd_accessed(gvt, offset); + if (is_mocs_mmio(offset)) + *vreg = cmd_val(s, index + 1); + + vreg_old = *vreg; + + if (intel_gvt_mmio_is_cmd_write_patch(gvt, offset)) { + u32 cmdval_new, cmdval; + struct intel_gvt_mmio_info *mmio_info; + + cmdval = cmd_val(s, index + 1); + + mmio_info = intel_gvt_find_mmio_info(gvt, offset); + if (!mmio_info) { + cmdval_new = cmdval; + } else { + u64 ro_mask = mmio_info->ro_mask; + int ret; + + if (likely(!ro_mask)) + ret = mmio_info->write(s->vgpu, offset, + &cmdval, 4); + else { + gvt_vgpu_err("try to write RO reg %x\n", + offset); + ret = -EBADRQC; + } + if (ret) + return ret; + cmdval_new = *vreg; + } + if (cmdval_new != cmdval) + patch_value(s, cmd_ptr(s, index+1), cmdval_new); + } + + /* only patch cmd. restore vreg value if changed in mmio write handler*/ + *vreg = vreg_old; + + /* TODO + * In order to let workload with inhibit context to generate + * correct image data into memory, vregs values will be loaded to + * hw via LRIs in the workload with inhibit context. But as + * indirect context is loaded prior to LRIs in workload, we don't + * want reg values specified in indirect context overwritten by + * LRIs in workloads. So, when scanning an indirect context, we + * update reg values in it into vregs, so LRIs in workload with + * inhibit context will restore with correct values + */ + if (GRAPHICS_VER(s->engine->i915) == 9 && + intel_gvt_mmio_is_sr_in_ctx(gvt, offset) && + !strncmp(cmd, "lri", 3)) { + intel_gvt_read_gpa(s->vgpu, + s->workload->ring_context_gpa + 12, &ctx_sr_ctl, 4); + /* check inhibit context */ + if (ctx_sr_ctl & 1) { + u32 data = cmd_val(s, index + 1); + + if (intel_gvt_mmio_has_mode_mask(s->vgpu->gvt, offset)) + intel_vgpu_mask_mmio_write(vgpu, + offset, &data, 4); + else + vgpu_vreg(vgpu, offset) = data; + } + } + return 0; } @@ -884,21 +1051,20 @@ static int cmd_handler_lri(struct parser_exec_state *s) { int i, ret = 0; int cmd_len = cmd_length(s); - struct intel_gvt *gvt = s->vgpu->gvt; for (i = 1; i < cmd_len; i += 2) { - if (IS_BROADWELL(gvt->dev_priv) && - (s->ring_id != RCS)) { - if (s->ring_id == BCS && - cmd_reg(s, i) == - i915_mmio_reg_offset(DERRMR)) + if (IS_BROADWELL(s->engine->i915) && s->engine->id != RCS0) { + if (s->engine->id == BCS0 && + cmd_reg(s, i) == i915_mmio_reg_offset(DERRMR)) ret |= 0; else - ret |= (cmd_reg_inhibit(s, i)) ? -EINVAL : 0; + ret |= cmd_reg_inhibit(s, i) ? -EBADRQC : 0; } if (ret) break; ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lri"); + if (ret) + break; } return ret; } @@ -909,14 +1075,18 @@ static int cmd_handler_lrr(struct parser_exec_state *s) int cmd_len = cmd_length(s); for (i = 1; i < cmd_len; i += 2) { - if (IS_BROADWELL(s->vgpu->gvt->dev_priv)) + if (IS_BROADWELL(s->engine->i915)) ret |= ((cmd_reg_inhibit(s, i) || - (cmd_reg_inhibit(s, i + 1)))) ? - -EINVAL : 0; + (cmd_reg_inhibit(s, i + 1)))) ? + -EBADRQC : 0; if (ret) break; ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lrr-src"); + if (ret) + break; ret |= cmd_reg_handler(s, cmd_reg(s, i + 1), i, "lrr-dst"); + if (ret) + break; } return ret; } @@ -933,16 +1103,20 @@ static int cmd_handler_lrm(struct parser_exec_state *s) int cmd_len = cmd_length(s); for (i = 1; i < cmd_len;) { - if (IS_BROADWELL(gvt->dev_priv)) - ret |= (cmd_reg_inhibit(s, i)) ? -EINVAL : 0; + if (IS_BROADWELL(s->engine->i915)) + ret |= (cmd_reg_inhibit(s, i)) ? -EBADRQC : 0; if (ret) break; ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lrm"); + if (ret) + break; if (cmd_val(s, 0) & (1 << 22)) { gma = cmd_gma(s, i + 1); if (gmadr_bytes == 8) gma |= (cmd_gma_hi(s, i + 2)) << 32; ret |= cmd_address_audit(s, gma, sizeof(u32), false); + if (ret) + break; } i += gmadr_dw_number(s) + 1; } @@ -958,11 +1132,15 @@ static int cmd_handler_srm(struct parser_exec_state *s) for (i = 1; i < cmd_len;) { ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "srm"); + if (ret) + break; if (cmd_val(s, 0) & (1 << 22)) { gma = cmd_gma(s, i + 1); if (gmadr_bytes == 8) gma |= (cmd_gma_hi(s, i + 2)) << 32; ret |= cmd_address_audit(s, gma, sizeof(u32), false); + if (ret) + break; } i += gmadr_dw_number(s) + 1; } @@ -975,28 +1153,28 @@ struct cmd_interrupt_event { int mi_user_interrupt; }; -static struct cmd_interrupt_event cmd_interrupt_events[] = { - [RCS] = { +static const struct cmd_interrupt_event cmd_interrupt_events[] = { + [RCS0] = { .pipe_control_notify = RCS_PIPE_CONTROL, .mi_flush_dw = INTEL_GVT_EVENT_RESERVED, .mi_user_interrupt = RCS_MI_USER_INTERRUPT, }, - [BCS] = { + [BCS0] = { .pipe_control_notify = INTEL_GVT_EVENT_RESERVED, .mi_flush_dw = BCS_MI_FLUSH_DW, .mi_user_interrupt = BCS_MI_USER_INTERRUPT, }, - [VCS] = { + [VCS0] = { .pipe_control_notify = INTEL_GVT_EVENT_RESERVED, .mi_flush_dw = VCS_MI_FLUSH_DW, .mi_user_interrupt = VCS_MI_USER_INTERRUPT, }, - [VCS2] = { + [VCS1] = { .pipe_control_notify = INTEL_GVT_EVENT_RESERVED, .mi_flush_dw = VCS2_MI_FLUSH_DW, .mi_user_interrupt = VCS2_MI_USER_INTERRUPT, }, - [VECS] = { + [VECS0] = { .pipe_control_notify = INTEL_GVT_EVENT_RESERVED, .mi_flush_dw = VECS_MI_FLUSH_DW, .mi_user_interrupt = VECS_MI_USER_INTERRUPT, @@ -1010,6 +1188,7 @@ static int cmd_handler_pipe_control(struct parser_exec_state *s) bool index_mode = false; unsigned int post_sync; int ret = 0; + u32 hws_pga, val; post_sync = (cmd_val(s, 1) & PIPE_CONTROL_POST_SYNC_OP_MASK) >> 14; @@ -1033,6 +1212,15 @@ static int cmd_handler_pipe_control(struct parser_exec_state *s) index_mode = true; ret |= cmd_address_audit(s, gma, sizeof(u64), index_mode); + if (ret) + return ret; + if (index_mode) { + hws_pga = s->vgpu->hws_pga[s->engine->id]; + gma = hws_pga + gma; + patch_value(s, cmd_ptr(s, 2), gma); + val = cmd_val(s, 1) & (~(1 << 21)); + patch_value(s, cmd_ptr(s, 1), val); + } } } } @@ -1041,15 +1229,16 @@ static int cmd_handler_pipe_control(struct parser_exec_state *s) return ret; if (cmd_val(s, 1) & PIPE_CONTROL_NOTIFY) - set_bit(cmd_interrupt_events[s->ring_id].pipe_control_notify, - s->workload->pending_events); + set_bit(cmd_interrupt_events[s->engine->id].pipe_control_notify, + s->workload->pending_events); return 0; } static int cmd_handler_mi_user_interrupt(struct parser_exec_state *s) { - set_bit(cmd_interrupt_events[s->ring_id].mi_user_interrupt, - s->workload->pending_events); + set_bit(cmd_interrupt_events[s->engine->id].mi_user_interrupt, + s->workload->pending_events); + patch_value(s, cmd_ptr(s, 0), MI_NOOP); return 0; } @@ -1066,6 +1255,8 @@ static int cmd_handler_mi_batch_buffer_end(struct parser_exec_state *s) s->buf_type = BATCH_BUFFER_INSTRUCTION; ret = ip_gma_set(s, s->ret_ip_gma_bb); s->buf_addr_type = s->saved_buf_addr_type; + } else if (s->buf_type == RING_BUFFER_CTX) { + ret = ip_gma_set(s, s->ring_tail); } else { s->buf_type = RING_BUFFER_INSTRUCTION; s->buf_addr_type = GTT_BUFFER; @@ -1098,7 +1289,8 @@ struct plane_code_mapping { static int gen8_decode_mi_display_flip(struct parser_exec_state *s, struct mi_display_flip_command_info *info) { - struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; + struct drm_i915_private *dev_priv = s->engine->i915; + struct intel_display *display = dev_priv->display; struct plane_code_mapping gen8_plane_code[] = { [0] = {PIPE_A, PLANE_A, PRIMARY_A_FLIP_DONE}, [1] = {PIPE_B, PLANE_A, PRIMARY_B_FLIP_DONE}, @@ -1115,8 +1307,8 @@ static int gen8_decode_mi_display_flip(struct parser_exec_state *s, dword2 = cmd_val(s, 2); v = (dword0 & GENMASK(21, 19)) >> 19; - if (WARN_ON(v >= ARRAY_SIZE(gen8_plane_code))) - return -EINVAL; + if (drm_WARN_ON(&dev_priv->drm, v >= ARRAY_SIZE(gen8_plane_code))) + return -EBADRQC; info->pipe = gen8_plane_code[v].pipe; info->plane = gen8_plane_code[v].plane; @@ -1127,16 +1319,16 @@ static int gen8_decode_mi_display_flip(struct parser_exec_state *s, info->async_flip = ((dword2 & GENMASK(1, 0)) == 0x1); if (info->plane == PLANE_A) { - info->ctrl_reg = DSPCNTR(info->pipe); - info->stride_reg = DSPSTRIDE(info->pipe); - info->surf_reg = DSPSURF(info->pipe); + info->ctrl_reg = DSPCNTR(display, info->pipe); + info->stride_reg = DSPSTRIDE(display, info->pipe); + info->surf_reg = DSPSURF(display, info->pipe); } else if (info->plane == PLANE_B) { info->ctrl_reg = SPRCTL(info->pipe); info->stride_reg = SPRSTRIDE(info->pipe); info->surf_reg = SPRSURF(info->pipe); } else { - WARN_ON(1); - return -EINVAL; + drm_WARN_ON(&dev_priv->drm, 1); + return -EBADRQC; } return 0; } @@ -1144,7 +1336,8 @@ static int gen8_decode_mi_display_flip(struct parser_exec_state *s, static int skl_decode_mi_display_flip(struct parser_exec_state *s, struct mi_display_flip_command_info *info) { - struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; + struct drm_i915_private *dev_priv = s->engine->i915; + struct intel_display *display = dev_priv->display; struct intel_vgpu *vgpu = s->vgpu; u32 dword0 = cmd_val(s, 0); u32 dword1 = cmd_val(s, 1); @@ -1185,7 +1378,7 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s, default: gvt_vgpu_err("unknown plane code %d\n", plane); - return -EINVAL; + return -EBADRQC; } info->stride_val = (dword1 & GENMASK(15, 6)) >> 6; @@ -1193,9 +1386,9 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s, info->surf_val = (dword2 & GENMASK(31, 12)) >> 12; info->async_flip = ((dword2 & GENMASK(1, 0)) == 0x1); - info->ctrl_reg = DSPCNTR(info->pipe); - info->stride_reg = DSPSTRIDE(info->pipe); - info->surf_reg = DSPSURF(info->pipe); + info->ctrl_reg = DSPCNTR(display, info->pipe); + info->stride_reg = DSPSTRIDE(display, info->pipe); + info->surf_reg = DSPSURF(display, info->pipe); return 0; } @@ -1203,20 +1396,19 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s, static int gen8_check_mi_display_flip(struct parser_exec_state *s, struct mi_display_flip_command_info *info) { - struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; u32 stride, tile; if (!info->async_flip) return 0; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - stride = vgpu_vreg(s->vgpu, info->stride_reg) & GENMASK(9, 0); - tile = (vgpu_vreg(s->vgpu, info->ctrl_reg) & + if (GRAPHICS_VER(s->engine->i915) >= 9) { + stride = vgpu_vreg_t(s->vgpu, info->stride_reg) & GENMASK(9, 0); + tile = (vgpu_vreg_t(s->vgpu, info->ctrl_reg) & GENMASK(12, 10)) >> 10; } else { - stride = (vgpu_vreg(s->vgpu, info->stride_reg) & + stride = (vgpu_vreg_t(s->vgpu, info->stride_reg) & GENMASK(15, 6)) >> 6; - tile = (vgpu_vreg(s->vgpu, info->ctrl_reg) & (1 << 10)) >> 10; + tile = (vgpu_vreg_t(s->vgpu, info->ctrl_reg) & (1 << 10)) >> 10; } if (stride != info->stride_val) @@ -1232,36 +1424,41 @@ static int gen8_update_plane_mmio_from_mi_display_flip( struct parser_exec_state *s, struct mi_display_flip_command_info *info) { - struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; + struct drm_i915_private *dev_priv = s->engine->i915; + struct intel_display *display = dev_priv->display; struct intel_vgpu *vgpu = s->vgpu; - set_mask_bits(&vgpu_vreg(vgpu, info->surf_reg), GENMASK(31, 12), + set_mask_bits(&vgpu_vreg_t(vgpu, info->surf_reg), GENMASK(31, 12), info->surf_val << 12); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - set_mask_bits(&vgpu_vreg(vgpu, info->stride_reg), GENMASK(9, 0), + if (GRAPHICS_VER(dev_priv) >= 9) { + set_mask_bits(&vgpu_vreg_t(vgpu, info->stride_reg), GENMASK(9, 0), info->stride_val); - set_mask_bits(&vgpu_vreg(vgpu, info->ctrl_reg), GENMASK(12, 10), + set_mask_bits(&vgpu_vreg_t(vgpu, info->ctrl_reg), GENMASK(12, 10), info->tile_val << 10); } else { - set_mask_bits(&vgpu_vreg(vgpu, info->stride_reg), GENMASK(15, 6), + set_mask_bits(&vgpu_vreg_t(vgpu, info->stride_reg), GENMASK(15, 6), info->stride_val << 6); - set_mask_bits(&vgpu_vreg(vgpu, info->ctrl_reg), GENMASK(10, 10), + set_mask_bits(&vgpu_vreg_t(vgpu, info->ctrl_reg), GENMASK(10, 10), info->tile_val << 10); } - vgpu_vreg(vgpu, PIPE_FRMCOUNT_G4X(info->pipe))++; - intel_vgpu_trigger_virtual_event(vgpu, info->event); + if (info->plane == PLANE_PRIMARY) + vgpu_vreg_t(vgpu, PIPE_FLIPCOUNT_G4X(display, info->pipe))++; + + if (info->async_flip) + intel_vgpu_trigger_virtual_event(vgpu, info->event); + else + set_bit(info->event, vgpu->irq.flip_done_event[info->pipe]); + return 0; } static int decode_mi_display_flip(struct parser_exec_state *s, struct mi_display_flip_command_info *info) { - struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; - - if (IS_BROADWELL(dev_priv)) + if (IS_BROADWELL(s->engine->i915)) return gen8_decode_mi_display_flip(s, info); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (GRAPHICS_VER(s->engine->i915) >= 9) return skl_decode_mi_display_flip(s, info); return -ENODEV; @@ -1270,26 +1467,14 @@ static int decode_mi_display_flip(struct parser_exec_state *s, static int check_mi_display_flip(struct parser_exec_state *s, struct mi_display_flip_command_info *info) { - struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; - - if (IS_BROADWELL(dev_priv) - || IS_SKYLAKE(dev_priv) - || IS_KABYLAKE(dev_priv)) - return gen8_check_mi_display_flip(s, info); - return -ENODEV; + return gen8_check_mi_display_flip(s, info); } static int update_plane_mmio_from_mi_display_flip( struct parser_exec_state *s, struct mi_display_flip_command_info *info) { - struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; - - if (IS_BROADWELL(dev_priv) - || IS_SKYLAKE(dev_priv) - || IS_KABYLAKE(dev_priv)) - return gen8_update_plane_mmio_from_mi_display_flip(s, info); - return -ENODEV; + return gen8_update_plane_mmio_from_mi_display_flip(s, info); } static int cmd_handler_mi_display_flip(struct parser_exec_state *s) @@ -1299,6 +1484,15 @@ static int cmd_handler_mi_display_flip(struct parser_exec_state *s) int ret; int i; int len = cmd_length(s); + u32 valid_len = CMD_LEN(1); + + /* Flip Type == Stereo 3D Flip */ + if (DWORD_FIELD(2, 1, 0) == 2) + valid_len++; + ret = gvt_check_valid_cmd_length(cmd_length(s), + valid_len); + if (ret) + return ret; ret = decode_mi_display_flip(s, &info); if (ret) { @@ -1348,10 +1542,13 @@ static unsigned long get_gma_bb_from_cmd(struct parser_exec_state *s, int index) { unsigned long addr; unsigned long gma_high, gma_low; - int gmadr_bytes = s->vgpu->gvt->device_info.gmadr_bytes_in_cmd; + struct intel_vgpu *vgpu = s->vgpu; + int gmadr_bytes = vgpu->gvt->device_info.gmadr_bytes_in_cmd; - if (WARN_ON(gmadr_bytes != 4 && gmadr_bytes != 8)) + if (WARN_ON(gmadr_bytes != 4 && gmadr_bytes != 8)) { + gvt_vgpu_err("invalid gma bytes %d\n", gmadr_bytes); return INTEL_GVT_INVALID_ADDR; + } gma_low = cmd_val(s, index) & BATCH_BUFFER_ADDR_MASK; if (gmadr_bytes == 4) { @@ -1374,21 +1571,21 @@ static inline int cmd_address_audit(struct parser_exec_state *s, if (op_size > max_surface_size) { gvt_vgpu_err("command address audit fail name %s\n", s->info->name); - return -EINVAL; + return -EFAULT; } if (index_mode) { - if (guest_gma >= GTT_PAGE_SIZE / sizeof(u64)) { - ret = -EINVAL; + if (guest_gma >= I915_GTT_PAGE_SIZE) { + ret = -EFAULT; goto err; } - } else if ((!vgpu_gmadr_is_valid(s->vgpu, guest_gma)) || - (!vgpu_gmadr_is_valid(s->vgpu, - guest_gma + op_size - 1))) { - ret = -EINVAL; + } else if (!intel_gvt_ggtt_validate_range(vgpu, guest_gma, op_size)) { + ret = -EFAULT; goto err; } + return 0; + err: gvt_vgpu_err("cmd_parser: Malicious %s detected, addr=0x%lx, len=%d!\n", s->info->name, guest_gma, op_size); @@ -1415,12 +1612,21 @@ static int cmd_handler_mi_store_data_imm(struct parser_exec_state *s) int op_size = (cmd_length(s) - 3) * sizeof(u32); int core_id = (cmd_val(s, 2) & (1 << 0)) ? 1 : 0; unsigned long gma, gma_low, gma_high; + u32 valid_len = CMD_LEN(2); int ret = 0; /* check ppggt */ if (!(cmd_val(s, 0) & (1 << 22))) return 0; + /* check if QWORD */ + if (DWORD_FIELD(0, 21, 21)) + valid_len++; + ret = gvt_check_valid_cmd_length(cmd_length(s), + valid_len); + if (ret) + return ret; + gma = cmd_val(s, 2) & GENMASK(31, 2); if (gmadr_bytes == 8) { @@ -1439,7 +1645,7 @@ static inline int unexpected_cmd(struct parser_exec_state *s) gvt_vgpu_err("Unexpected %s in command buffer!\n", s->info->name); - return -EINVAL; + return -EBADRQC; } static int cmd_handler_mi_semaphore_wait(struct parser_exec_state *s) @@ -1463,11 +1669,20 @@ static int cmd_handler_mi_op_2f(struct parser_exec_state *s) int op_size = (1 << ((cmd_val(s, 0) & GENMASK(20, 19)) >> 19)) * sizeof(u32); unsigned long gma, gma_high; + u32 valid_len = CMD_LEN(1); int ret = 0; if (!(cmd_val(s, 0) & (1 << 22))) return ret; + /* check inline data */ + if (cmd_val(s, 0) & BIT(18)) + valid_len = CMD_LEN(9); + ret = gvt_check_valid_cmd_length(cmd_length(s), + valid_len); + if (ret) + return ret; + gma = cmd_val(s, 1) & GENMASK(31, 2); if (gmadr_bytes == 8) { gma_high = cmd_val(s, 2) & GENMASK(15, 0); @@ -1504,6 +1719,17 @@ static int cmd_handler_mi_flush_dw(struct parser_exec_state *s) unsigned long gma; bool index_mode = false; int ret = 0; + u32 hws_pga, val; + u32 valid_len = CMD_LEN(2); + + ret = gvt_check_valid_cmd_length(cmd_length(s), + valid_len); + if (ret) { + /* Check again for Qword */ + ret = gvt_check_valid_cmd_length(cmd_length(s), + ++valid_len); + return ret; + } /* Check post-sync and ppgtt bit */ if (((cmd_val(s, 0) >> 14) & 0x3) && (cmd_val(s, 1) & (1 << 2))) { @@ -1514,11 +1740,20 @@ static int cmd_handler_mi_flush_dw(struct parser_exec_state *s) if (cmd_val(s, 0) & (1 << 21)) index_mode = true; ret = cmd_address_audit(s, gma, sizeof(u64), index_mode); + if (ret) + return ret; + if (index_mode) { + hws_pga = s->vgpu->hws_pga[s->engine->id]; + gma = hws_pga + gma; + patch_value(s, cmd_ptr(s, 1), gma); + val = cmd_val(s, 0) & (~(1 << 21)); + patch_value(s, cmd_ptr(s, 0), val); + } } /* Check notify bit */ if ((cmd_val(s, 0) & (1 << 8))) - set_bit(cmd_interrupt_events[s->ring_id].mi_flush_dw, - s->workload->pending_events); + set_bit(cmd_interrupt_events[s->engine->id].mi_flush_dw, + s->workload->pending_events); return ret; } @@ -1545,12 +1780,12 @@ static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm, return -EFAULT; } - offset = gma & (GTT_PAGE_SIZE - 1); + offset = gma & (I915_GTT_PAGE_SIZE - 1); - copy_len = (end_gma - gma) >= (GTT_PAGE_SIZE - offset) ? - GTT_PAGE_SIZE - offset : end_gma - gma; + copy_len = (end_gma - gma) >= (I915_GTT_PAGE_SIZE - offset) ? + I915_GTT_PAGE_SIZE - offset : end_gma - gma; - intel_gvt_hypervisor_read_gpa(vgpu, gpa, va + len, copy_len); + intel_gvt_read_gpa(vgpu, gpa, va + len, copy_len); len += copy_len; gma += copy_len; @@ -1565,138 +1800,201 @@ static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm, */ static int batch_buffer_needs_scan(struct parser_exec_state *s) { - struct intel_gvt *gvt = s->vgpu->gvt; + /* Decide privilege based on address space */ + if (cmd_val(s, 0) & BIT(8) && + !(s->vgpu->scan_nonprivbb & s->engine->mask)) + return 0; - if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) - || IS_KABYLAKE(gvt->dev_priv)) { - /* BDW decides privilege based on address space */ - if (cmd_val(s, 0) & (1 << 8)) - return 0; - } return 1; } -static uint32_t find_bb_size(struct parser_exec_state *s) +static const char *repr_addr_type(unsigned int type) +{ + return type == PPGTT_BUFFER ? "ppgtt" : "ggtt"; +} + +static int find_bb_size(struct parser_exec_state *s, + unsigned long *bb_size, + unsigned long *bb_end_cmd_offset) { unsigned long gma = 0; - struct cmd_info *info; - uint32_t bb_size = 0; - uint32_t cmd_len = 0; - bool met_bb_end = false; + const struct cmd_info *info; + u32 cmd_len = 0; + bool bb_end = false; struct intel_vgpu *vgpu = s->vgpu; u32 cmd; + struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ? + s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm; + + *bb_size = 0; + *bb_end_cmd_offset = 0; /* get the start gm address of the batch buffer */ gma = get_gma_bb_from_cmd(s, 1); - cmd = cmd_val(s, 0); + if (gma == INTEL_GVT_INVALID_ADDR) + return -EFAULT; - info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); + cmd = cmd_val(s, 0); + info = get_cmd_info(s->vgpu->gvt, cmd, s->engine); if (info == NULL) { - gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", - cmd, get_opcode(cmd, s->ring_id)); - return -EINVAL; + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %s, workload=%p\n", + cmd, get_opcode(cmd, s->engine), + repr_addr_type(s->buf_addr_type), + s->engine->name, s->workload); + return -EBADRQC; } do { - copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, - gma, gma + 4, &cmd); - info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); + if (copy_gma_to_hva(s->vgpu, mm, + gma, gma + 4, &cmd) < 0) + return -EFAULT; + info = get_cmd_info(s->vgpu->gvt, cmd, s->engine); if (info == NULL) { - gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", - cmd, get_opcode(cmd, s->ring_id)); - return -EINVAL; + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %s, workload=%p\n", + cmd, get_opcode(cmd, s->engine), + repr_addr_type(s->buf_addr_type), + s->engine->name, s->workload); + return -EBADRQC; } if (info->opcode == OP_MI_BATCH_BUFFER_END) { - met_bb_end = true; + bb_end = true; } else if (info->opcode == OP_MI_BATCH_BUFFER_START) { - if (BATCH_BUFFER_2ND_LEVEL_BIT(cmd) == 0) { + if (BATCH_BUFFER_2ND_LEVEL_BIT(cmd) == 0) /* chained batch buffer */ - met_bb_end = true; - } + bb_end = true; } + + if (bb_end) + *bb_end_cmd_offset = *bb_size; + cmd_len = get_cmd_length(info, cmd) << 2; - bb_size += cmd_len; + *bb_size += cmd_len; gma += cmd_len; + } while (!bb_end); + + return 0; +} + +static int audit_bb_end(struct parser_exec_state *s, void *va) +{ + struct intel_vgpu *vgpu = s->vgpu; + u32 cmd = *(u32 *)va; + const struct cmd_info *info; + + info = get_cmd_info(s->vgpu->gvt, cmd, s->engine); + if (info == NULL) { + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %s, workload=%p\n", + cmd, get_opcode(cmd, s->engine), + repr_addr_type(s->buf_addr_type), + s->engine->name, s->workload); + return -EBADRQC; + } - } while (!met_bb_end); + if ((info->opcode == OP_MI_BATCH_BUFFER_END) || + ((info->opcode == OP_MI_BATCH_BUFFER_START) && + (BATCH_BUFFER_2ND_LEVEL_BIT(cmd) == 0))) + return 0; - return bb_size; + return -EBADRQC; } static int perform_bb_shadow(struct parser_exec_state *s) { - struct intel_shadow_bb_entry *entry_obj; struct intel_vgpu *vgpu = s->vgpu; + struct intel_vgpu_shadow_bb *bb; unsigned long gma = 0; - uint32_t bb_size; - void *dst = NULL; + unsigned long bb_size; + unsigned long bb_end_cmd_offset; int ret = 0; + struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ? + s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm; + unsigned long start_offset = 0; - /* get the start gm address of the batch buffer */ + /* Get the start gm address of the batch buffer */ gma = get_gma_bb_from_cmd(s, 1); + if (gma == INTEL_GVT_INVALID_ADDR) + return -EFAULT; - /* get the size of the batch buffer */ - bb_size = find_bb_size(s); + ret = find_bb_size(s, &bb_size, &bb_end_cmd_offset); + if (ret) + return ret; - /* allocate shadow batch buffer */ - entry_obj = kmalloc(sizeof(*entry_obj), GFP_KERNEL); - if (entry_obj == NULL) + bb = kzalloc(sizeof(*bb), GFP_KERNEL); + if (!bb) return -ENOMEM; - entry_obj->obj = - i915_gem_object_create(s->vgpu->gvt->dev_priv, - roundup(bb_size, PAGE_SIZE)); - if (IS_ERR(entry_obj->obj)) { - ret = PTR_ERR(entry_obj->obj); - goto free_entry; - } - entry_obj->len = bb_size; - INIT_LIST_HEAD(&entry_obj->list); + bb->ppgtt = s->buf_addr_type != GTT_BUFFER; - dst = i915_gem_object_pin_map(entry_obj->obj, I915_MAP_WB); - if (IS_ERR(dst)) { - ret = PTR_ERR(dst); - goto put_obj; + /* + * The start_offset stores the batch buffer's start gma's + * offset relative to page boundary. So for non-privileged batch + * buffer, the shadowed gem object holds exactly the same page + * layout as original gem object. This is for the convenience of + * replacing the whole non-privilged batch buffer page to this + * shadowed one in PPGTT at the same gma address. (This replacing + * action is not implemented yet now, but may be necessary in + * future). + * For prileged batch buffer, we just change start gma address to + * that of shadowed page. + */ + if (bb->ppgtt) + start_offset = gma & ~I915_GTT_PAGE_MASK; + + bb->obj = i915_gem_object_create_shmem(s->engine->i915, + round_up(bb_size + start_offset, + PAGE_SIZE)); + if (IS_ERR(bb->obj)) { + ret = PTR_ERR(bb->obj); + goto err_free_bb; } - ret = i915_gem_object_set_to_cpu_domain(entry_obj->obj, false); - if (ret) { - gvt_vgpu_err("failed to set shadow batch to CPU\n"); - goto unmap_src; + bb->va = i915_gem_object_pin_map(bb->obj, I915_MAP_WB); + if (IS_ERR(bb->va)) { + ret = PTR_ERR(bb->va); + goto err_free_obj; } - entry_obj->va = dst; - entry_obj->bb_start_cmd_va = s->ip_va; - - /* copy batch buffer to shadow batch buffer*/ - ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, + ret = copy_gma_to_hva(s->vgpu, mm, gma, gma + bb_size, - dst); + bb->va + start_offset); if (ret < 0) { gvt_vgpu_err("fail to copy guest ring buffer\n"); - goto unmap_src; + ret = -EFAULT; + goto err_unmap; } - list_add(&entry_obj->list, &s->workload->shadow_bb); + ret = audit_bb_end(s, bb->va + start_offset + bb_end_cmd_offset); + if (ret) + goto err_unmap; + + i915_gem_object_unlock(bb->obj); + INIT_LIST_HEAD(&bb->list); + list_add(&bb->list, &s->workload->shadow_bb); + + bb->bb_start_cmd_va = s->ip_va; + + if ((s->buf_type == BATCH_BUFFER_INSTRUCTION) && (!s->is_ctx_wa)) + bb->bb_offset = s->ip_va - s->rb_va; + else + bb->bb_offset = 0; + /* * ip_va saves the virtual address of the shadow batch buffer, while * ip_gma saves the graphics address of the original batch buffer. - * As the shadow batch buffer is just a copy from the originial one, + * As the shadow batch buffer is just a copy from the original one, * it should be right to use shadow batch buffer'va and original batch * buffer's gma in pair. After all, we don't want to pin the shadow * buffer here (too early). */ - s->ip_va = dst; + s->ip_va = bb->va + start_offset; s->ip_gma = gma; - return 0; - -unmap_src: - i915_gem_object_unpin_map(entry_obj->obj); -put_obj: - i915_gem_object_put(entry_obj->obj); -free_entry: - kfree(entry_obj); +err_unmap: + i915_gem_object_unpin_map(bb->obj); +err_free_obj: + i915_gem_object_put(bb->obj); +err_free_bb: + kfree(bb); return ret; } @@ -1708,13 +2006,13 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s) if (s->buf_type == BATCH_BUFFER_2ND_LEVEL) { gvt_vgpu_err("Found MI_BATCH_BUFFER_START in 2nd level BB\n"); - return -EINVAL; + return -EFAULT; } second_level = BATCH_BUFFER_2ND_LEVEL_BIT(cmd_val(s, 0)) == 1; if (second_level && (s->buf_type != BATCH_BUFFER_INSTRUCTION)) { gvt_vgpu_err("Jumping to 2nd level BB from RB is not allowed\n"); - return -EINVAL; + return -EFAULT; } s->saved_buf_addr_type = s->buf_addr_type; @@ -1738,11 +2036,12 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s) if (ret < 0) return ret; } - return ret; } -static struct cmd_info cmd_info[] = { +static int mi_noop_index; + +static const struct cmd_info cmd_info[] = { {"MI_NOOP", OP_MI_NOOP, F_LEN_CONST, R_ALL, D_ALL, 0, 1, NULL}, {"MI_SET_PREDICATE", OP_MI_SET_PREDICATE, F_LEN_CONST, R_ALL, D_ALL, @@ -1790,21 +2089,24 @@ static struct cmd_info cmd_info[] = { {"MI_RS_CONTEXT", OP_MI_RS_CONTEXT, F_LEN_CONST, R_RCS, D_ALL, 0, 1, NULL}, - {"MI_DISPLAY_FLIP", OP_MI_DISPLAY_FLIP, F_LEN_VAR | F_POST_HANDLE, + {"MI_DISPLAY_FLIP", OP_MI_DISPLAY_FLIP, F_LEN_VAR, R_RCS | R_BCS, D_ALL, 0, 8, cmd_handler_mi_display_flip}, - {"MI_SEMAPHORE_MBOX", OP_MI_SEMAPHORE_MBOX, F_LEN_VAR, R_ALL, D_ALL, - 0, 8, NULL}, + {"MI_SEMAPHORE_MBOX", OP_MI_SEMAPHORE_MBOX, F_LEN_VAR | F_LEN_VAR_FIXED, + R_ALL, D_ALL, 0, 8, NULL, CMD_LEN(1)}, {"MI_MATH", OP_MI_MATH, F_LEN_VAR, R_ALL, D_ALL, 0, 8, NULL}, - {"MI_URB_CLEAR", OP_MI_URB_CLEAR, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL}, + {"MI_URB_CLEAR", OP_MI_URB_CLEAR, F_LEN_VAR | F_LEN_VAR_FIXED, R_RCS, + D_ALL, 0, 8, NULL, CMD_LEN(0)}, - {"ME_SEMAPHORE_SIGNAL", OP_MI_SEMAPHORE_SIGNAL, F_LEN_VAR, R_ALL, - D_BDW_PLUS, 0, 8, NULL}, + {"MI_SEMAPHORE_SIGNAL", OP_MI_SEMAPHORE_SIGNAL, + F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_BDW_PLUS, 0, 8, + NULL, CMD_LEN(0)}, - {"ME_SEMAPHORE_WAIT", OP_MI_SEMAPHORE_WAIT, F_LEN_VAR, R_ALL, D_BDW_PLUS, - ADDR_FIX_1(2), 8, cmd_handler_mi_semaphore_wait}, + {"MI_SEMAPHORE_WAIT", OP_MI_SEMAPHORE_WAIT, + F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_BDW_PLUS, ADDR_FIX_1(2), + 8, cmd_handler_mi_semaphore_wait, CMD_LEN(2)}, {"MI_STORE_DATA_IMM", OP_MI_STORE_DATA_IMM, F_LEN_VAR, R_ALL, D_BDW_PLUS, ADDR_FIX_1(1), 10, cmd_handler_mi_store_data_imm}, @@ -1818,8 +2120,9 @@ static struct cmd_info cmd_info[] = { {"MI_UPDATE_GTT", OP_MI_UPDATE_GTT, F_LEN_VAR, R_ALL, D_BDW_PLUS, 0, 10, cmd_handler_mi_update_gtt}, - {"MI_STORE_REGISTER_MEM", OP_MI_STORE_REGISTER_MEM, F_LEN_VAR, R_ALL, - D_ALL, ADDR_FIX_1(2), 8, cmd_handler_srm}, + {"MI_STORE_REGISTER_MEM", OP_MI_STORE_REGISTER_MEM, + F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, ADDR_FIX_1(2), 8, + cmd_handler_srm, CMD_LEN(2)}, {"MI_FLUSH_DW", OP_MI_FLUSH_DW, F_LEN_VAR, R_ALL, D_ALL, 0, 6, cmd_handler_mi_flush_dw}, @@ -1827,26 +2130,30 @@ static struct cmd_info cmd_info[] = { {"MI_CLFLUSH", OP_MI_CLFLUSH, F_LEN_VAR, R_ALL, D_ALL, ADDR_FIX_1(1), 10, cmd_handler_mi_clflush}, - {"MI_REPORT_PERF_COUNT", OP_MI_REPORT_PERF_COUNT, F_LEN_VAR, R_ALL, - D_ALL, ADDR_FIX_1(1), 6, cmd_handler_mi_report_perf_count}, + {"MI_REPORT_PERF_COUNT", OP_MI_REPORT_PERF_COUNT, + F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, ADDR_FIX_1(1), 6, + cmd_handler_mi_report_perf_count, CMD_LEN(2)}, - {"MI_LOAD_REGISTER_MEM", OP_MI_LOAD_REGISTER_MEM, F_LEN_VAR, R_ALL, - D_ALL, ADDR_FIX_1(2), 8, cmd_handler_lrm}, + {"MI_LOAD_REGISTER_MEM", OP_MI_LOAD_REGISTER_MEM, + F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, ADDR_FIX_1(2), 8, + cmd_handler_lrm, CMD_LEN(2)}, - {"MI_LOAD_REGISTER_REG", OP_MI_LOAD_REGISTER_REG, F_LEN_VAR, R_ALL, - D_ALL, 0, 8, cmd_handler_lrr}, + {"MI_LOAD_REGISTER_REG", OP_MI_LOAD_REGISTER_REG, + F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, 0, 8, + cmd_handler_lrr, CMD_LEN(1)}, - {"MI_RS_STORE_DATA_IMM", OP_MI_RS_STORE_DATA_IMM, F_LEN_VAR, R_RCS, - D_ALL, 0, 8, NULL}, + {"MI_RS_STORE_DATA_IMM", OP_MI_RS_STORE_DATA_IMM, + F_LEN_VAR | F_LEN_VAR_FIXED, R_RCS, D_ALL, 0, + 8, NULL, CMD_LEN(2)}, - {"MI_LOAD_URB_MEM", OP_MI_LOAD_URB_MEM, F_LEN_VAR, R_RCS, D_ALL, - ADDR_FIX_1(2), 8, NULL}, + {"MI_LOAD_URB_MEM", OP_MI_LOAD_URB_MEM, F_LEN_VAR | F_LEN_VAR_FIXED, + R_RCS, D_ALL, ADDR_FIX_1(2), 8, NULL, CMD_LEN(2)}, {"MI_STORE_URM_MEM", OP_MI_STORE_URM_MEM, F_LEN_VAR, R_RCS, D_ALL, ADDR_FIX_1(2), 8, NULL}, - {"MI_OP_2E", OP_MI_2E, F_LEN_VAR, R_ALL, D_BDW_PLUS, ADDR_FIX_2(1, 2), - 8, cmd_handler_mi_op_2e}, + {"MI_OP_2E", OP_MI_2E, F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_BDW_PLUS, + ADDR_FIX_2(1, 2), 8, cmd_handler_mi_op_2e, CMD_LEN(3)}, {"MI_OP_2F", OP_MI_2F, F_LEN_VAR, R_ALL, D_BDW_PLUS, ADDR_FIX_1(1), 8, cmd_handler_mi_op_2f}, @@ -1856,8 +2163,8 @@ static struct cmd_info cmd_info[] = { cmd_handler_mi_batch_buffer_start}, {"MI_CONDITIONAL_BATCH_BUFFER_END", OP_MI_CONDITIONAL_BATCH_BUFFER_END, - F_LEN_VAR, R_ALL, D_ALL, ADDR_FIX_1(2), 8, - cmd_handler_mi_conditional_batch_buffer_end}, + F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, ADDR_FIX_1(2), 8, + cmd_handler_mi_conditional_batch_buffer_end, CMD_LEN(2)}, {"MI_LOAD_SCAN_LINES_INCL", OP_MI_LOAD_SCAN_LINES_INCL, F_LEN_CONST, R_RCS | R_BCS, D_ALL, 0, 2, NULL}, @@ -2245,6 +2552,9 @@ static struct cmd_info cmd_info[] = { {"OP_3D_MEDIA_0_1_4", OP_3D_MEDIA_0_1_4, F_LEN_VAR, R_RCS, D_ALL, ADDR_FIX_1(1), 8, NULL}, + {"OP_SWTESS_BASE_ADDRESS", OP_SWTESS_BASE_ADDRESS, + F_LEN_VAR, R_RCS, D_ALL, ADDR_FIX_2(1, 2), 3, NULL}, + {"3DSTATE_VS", OP_3DSTATE_VS, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL}, {"3DSTATE_SF", OP_3DSTATE_SF, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL}, @@ -2264,6 +2574,9 @@ static struct cmd_info cmd_info[] = { {"MEDIA_STATE_FLUSH", OP_MEDIA_STATE_FLUSH, F_LEN_VAR, R_RCS, D_ALL, 0, 16, NULL}, + {"MEDIA_POOL_STATE", OP_MEDIA_POOL_STATE, F_LEN_VAR, R_RCS, D_ALL, + 0, 16, NULL}, + {"MEDIA_OBJECT", OP_MEDIA_OBJECT, F_LEN_VAR, R_RCS, D_ALL, 0, 16, NULL}, {"MEDIA_CURBE_LOAD", OP_MEDIA_CURBE_LOAD, F_LEN_VAR, R_RCS, D_ALL, @@ -2406,7 +2719,7 @@ static struct cmd_info cmd_info[] = { 0, 12, NULL}, {"VEB_DI_IECP", OP_VEB_DNDI_IECP_STATE, F_LEN_VAR, R_VECS, D_BDW_PLUS, - 0, 20, NULL}, + 0, 12, NULL}, }; static void add_cmd_entry(struct intel_gvt *gvt, struct cmd_entry *e) @@ -2418,23 +2731,38 @@ static void add_cmd_entry(struct intel_gvt *gvt, struct cmd_entry *e) static int cmd_parser_exec(struct parser_exec_state *s) { struct intel_vgpu *vgpu = s->vgpu; - struct cmd_info *info; + const struct cmd_info *info; u32 cmd; int ret = 0; cmd = cmd_val(s, 0); - info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); + /* fastpath for MI_NOOP */ + if (cmd == MI_NOOP) + info = &cmd_info[mi_noop_index]; + else + info = get_cmd_info(s->vgpu->gvt, cmd, s->engine); + if (info == NULL) { - gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", - cmd, get_opcode(cmd, s->ring_id)); - return -EINVAL; + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %s, workload=%p\n", + cmd, get_opcode(cmd, s->engine), + repr_addr_type(s->buf_addr_type), + s->engine->name, s->workload); + return -EBADRQC; } s->info = info; - trace_gvt_command(vgpu->id, s->ring_id, s->ip_gma, s->ip_va, - cmd_length(s), s->buf_type); + trace_gvt_command(vgpu->id, s->engine->id, s->ip_gma, s->ip_va, + cmd_length(s), s->buf_type, s->buf_addr_type, + s->workload, info->name); + + if ((info->flag & F_LEN_MASK) == F_LEN_VAR_FIXED) { + ret = gvt_check_valid_cmd_length(cmd_length(s), + info->valid_len); + if (ret) + return ret; + } if (info->handler) { ret = info->handler(s); @@ -2463,6 +2791,10 @@ static inline bool gma_out_of_range(unsigned long gma, return (gma > gma_tail) && (gma < gma_head); } +/* Keep the consistent return type, e.g EBADRQC for unknown + * cmd, EFAULT for invalid address, EPERM for nonpriv. later + * works as the input of VM healthy status. + */ static int command_scan(struct parser_exec_state *s, unsigned long rb_head, unsigned long rb_tail, unsigned long rb_start, unsigned long rb_len) @@ -2477,7 +2809,8 @@ static int command_scan(struct parser_exec_state *s, gma_bottom = rb_start + rb_len; while (s->ip_gma != gma_tail) { - if (s->buf_type == RING_BUFFER_INSTRUCTION) { + if (s->buf_type == RING_BUFFER_INSTRUCTION || + s->buf_type == RING_BUFFER_CTX) { if (!(s->ip_gma >= rb_start) || !(s->ip_gma < gma_bottom)) { gvt_vgpu_err("ip_gma %lx out of ring scope." @@ -2485,7 +2818,7 @@ static int command_scan(struct parser_exec_state *s, s->ip_gma, rb_start, gma_bottom); parser_exec_state_dump(s); - return -EINVAL; + return -EFAULT; } if (gma_out_of_range(s->ip_gma, gma_head, gma_tail)) { gvt_vgpu_err("ip_gma %lx out of range." @@ -2509,38 +2842,32 @@ static int command_scan(struct parser_exec_state *s, static int scan_workload(struct intel_vgpu_workload *workload) { - unsigned long gma_head, gma_tail, gma_bottom; + unsigned long gma_head, gma_tail; struct parser_exec_state s; int ret = 0; /* ring base is page aligned */ - if (WARN_ON(!IS_ALIGNED(workload->rb_start, GTT_PAGE_SIZE))) + if (WARN_ON(!IS_ALIGNED(workload->rb_start, I915_GTT_PAGE_SIZE))) return -EINVAL; gma_head = workload->rb_start + workload->rb_head; gma_tail = workload->rb_start + workload->rb_tail; - gma_bottom = workload->rb_start + _RING_CTL_BUF_SIZE(workload->rb_ctl); s.buf_type = RING_BUFFER_INSTRUCTION; s.buf_addr_type = GTT_BUFFER; s.vgpu = workload->vgpu; - s.ring_id = workload->ring_id; + s.engine = workload->engine; s.ring_start = workload->rb_start; s.ring_size = _RING_CTL_BUF_SIZE(workload->rb_ctl); s.ring_head = gma_head; s.ring_tail = gma_tail; s.rb_va = workload->shadow_ring_buffer_va; s.workload = workload; + s.is_ctx_wa = false; - if ((bypass_scan_mask & (1 << workload->ring_id)) || - gma_head == gma_tail) + if (bypass_scan_mask & workload->engine->mask || gma_head == gma_tail) return 0; - if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) { - ret = -EINVAL; - goto out; - } - ret = ip_gma_set(&s, gma_head); if (ret) goto out; @@ -2555,7 +2882,7 @@ out: static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) { - unsigned long gma_head, gma_tail, gma_bottom, ring_size, ring_tail; + unsigned long gma_head, gma_tail, ring_size, ring_tail; struct parser_exec_state s; int ret = 0; struct intel_vgpu_workload *workload = container_of(wa_ctx, @@ -2563,31 +2890,27 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) wa_ctx); /* ring base is page aligned */ - if (WARN_ON(!IS_ALIGNED(wa_ctx->indirect_ctx.guest_gma, GTT_PAGE_SIZE))) + if (WARN_ON(!IS_ALIGNED(wa_ctx->indirect_ctx.guest_gma, + I915_GTT_PAGE_SIZE))) return -EINVAL; - ring_tail = wa_ctx->indirect_ctx.size + 3 * sizeof(uint32_t); + ring_tail = wa_ctx->indirect_ctx.size + 3 * sizeof(u32); ring_size = round_up(wa_ctx->indirect_ctx.size + CACHELINE_BYTES, PAGE_SIZE); gma_head = wa_ctx->indirect_ctx.guest_gma; gma_tail = wa_ctx->indirect_ctx.guest_gma + ring_tail; - gma_bottom = wa_ctx->indirect_ctx.guest_gma + ring_size; s.buf_type = RING_BUFFER_INSTRUCTION; s.buf_addr_type = GTT_BUFFER; s.vgpu = workload->vgpu; - s.ring_id = workload->ring_id; + s.engine = workload->engine; s.ring_start = wa_ctx->indirect_ctx.guest_gma; s.ring_size = ring_size; s.ring_head = gma_head; s.ring_tail = gma_tail; s.rb_va = wa_ctx->indirect_ctx.shadow_va; s.workload = workload; - - if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) { - ret = -EINVAL; - goto out; - } + s.is_ctx_wa = true; ret = ip_gma_set(&s, gma_head); if (ret) @@ -2602,8 +2925,9 @@ out: static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; unsigned long gma_head, gma_tail, gma_top, guest_rb_size; - u32 *cs; + void *shadow_ring_buffer_va; int ret; guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl); @@ -2616,38 +2940,48 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) gma_tail = workload->rb_start + workload->rb_tail; gma_top = workload->rb_start + guest_rb_size; - /* allocate shadow ring buffer */ - cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32)); - if (IS_ERR(cs)) - return PTR_ERR(cs); + if (workload->rb_len > s->ring_scan_buffer_size[workload->engine->id]) { + void *p; + + /* realloc the new ring buffer if needed */ + p = krealloc(s->ring_scan_buffer[workload->engine->id], + workload->rb_len, GFP_KERNEL); + if (!p) { + gvt_vgpu_err("fail to re-alloc ring scan buffer\n"); + return -ENOMEM; + } + s->ring_scan_buffer[workload->engine->id] = p; + s->ring_scan_buffer_size[workload->engine->id] = workload->rb_len; + } + + shadow_ring_buffer_va = s->ring_scan_buffer[workload->engine->id]; /* get shadow ring buffer va */ - workload->shadow_ring_buffer_va = cs; + workload->shadow_ring_buffer_va = shadow_ring_buffer_va; /* head > tail --> copy head <-> top */ if (gma_head > gma_tail) { ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, - gma_head, gma_top, cs); + gma_head, gma_top, shadow_ring_buffer_va); if (ret < 0) { gvt_vgpu_err("fail to copy guest ring buffer\n"); return ret; } - cs += ret / sizeof(u32); + shadow_ring_buffer_va += ret; gma_head = workload->rb_start; } /* copy head or start <-> tail */ - ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, cs); + ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, + shadow_ring_buffer_va); if (ret < 0) { gvt_vgpu_err("fail to copy guest ring buffer\n"); return ret; } - cs += ret / sizeof(u32); - intel_ring_advance(workload->req, cs); return 0; } -int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) +int intel_gvt_scan_and_shadow_ringbuffer(struct intel_vgpu_workload *workload) { int ret; struct intel_vgpu *vgpu = workload->vgpu; @@ -2678,9 +3012,9 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx) int ret = 0; void *map; - obj = i915_gem_object_create(workload->vgpu->gvt->dev_priv, - roundup(ctx_size + CACHELINE_BYTES, - PAGE_SIZE)); + obj = i915_gem_object_create_shmem(workload->engine->i915, + roundup(ctx_size + CACHELINE_BYTES, + PAGE_SIZE)); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -2692,7 +3026,9 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx) goto put_obj; } + i915_gem_object_lock(obj, NULL); ret = i915_gem_object_set_to_cpu_domain(obj, false); + i915_gem_object_unlock(obj); if (ret) { gvt_vgpu_err("failed to set shadow indirect ctx to CPU\n"); goto unmap_src; @@ -2714,15 +3050,18 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx) unmap_src: i915_gem_object_unpin_map(obj); put_obj: - i915_gem_object_put(wa_ctx->indirect_ctx.obj); + i915_gem_object_put(obj); return ret; } static int combine_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) { - uint32_t per_ctx_start[CACHELINE_DWORDS] = {0}; + u32 per_ctx_start[CACHELINE_DWORDS] = {}; unsigned char *bb_start_sva; + if (!wa_ctx->per_ctx.valid) + return 0; + per_ctx_start[0] = 0x18800001; per_ctx_start[1] = wa_ctx->per_ctx.guest_gma; @@ -2762,30 +3101,126 @@ int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) return 0; } -static struct cmd_info *find_cmd_entry_any_ring(struct intel_gvt *gvt, - unsigned int opcode, int rings) +/* generate dummy contexts by sending empty requests to HW, and let + * the HW to fill Engine Contexts. This dummy contexts are used for + * initialization purpose (update reg whitelist), so referred to as + * init context here + */ +void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu) { - struct cmd_info *info = NULL; - unsigned int ring; + const unsigned long start = LRC_STATE_PN * PAGE_SIZE; + struct intel_gvt *gvt = vgpu->gvt; + struct intel_engine_cs *engine; + enum intel_engine_id id; - for_each_set_bit(ring, (unsigned long *)&rings, I915_NUM_ENGINES) { - info = find_cmd_entry(gvt, opcode, ring); - if (info) - break; + if (gvt->is_reg_whitelist_updated) + return; + + /* scan init ctx to update cmd accessible list */ + for_each_engine(engine, gvt->gt, id) { + struct parser_exec_state s; + void *vaddr; + int ret; + + if (!engine->default_state) + continue; + + vaddr = shmem_pin_map(engine->default_state); + if (!vaddr) { + gvt_err("failed to map %s->default state\n", + engine->name); + return; + } + + s.buf_type = RING_BUFFER_CTX; + s.buf_addr_type = GTT_BUFFER; + s.vgpu = vgpu; + s.engine = engine; + s.ring_start = 0; + s.ring_size = engine->context_size - start; + s.ring_head = 0; + s.ring_tail = s.ring_size; + s.rb_va = vaddr + start; + s.workload = NULL; + s.is_ctx_wa = false; + s.is_init_ctx = true; + + /* skipping the first RING_CTX_SIZE(0x50) dwords */ + ret = ip_gma_set(&s, RING_CTX_SIZE); + if (ret == 0) { + ret = command_scan(&s, 0, s.ring_size, 0, s.ring_size); + if (ret) + gvt_err("Scan init ctx error\n"); + } + + shmem_unpin_map(engine->default_state, vaddr); + if (ret) + return; } - return info; + + gvt->is_reg_whitelist_updated = true; +} + +int intel_gvt_scan_engine_context(struct intel_vgpu_workload *workload) +{ + struct intel_vgpu *vgpu = workload->vgpu; + unsigned long gma_head, gma_tail, gma_start, ctx_size; + struct parser_exec_state s; + int ring_id = workload->engine->id; + struct intel_context *ce = vgpu->submission.shadow[ring_id]; + int ret; + + GEM_BUG_ON(atomic_read(&ce->pin_count) < 0); + + ctx_size = workload->engine->context_size - PAGE_SIZE; + + /* Only ring contxt is loaded to HW for inhibit context, no need to + * scan engine context + */ + if (is_inhibit_context(ce)) + return 0; + + gma_start = i915_ggtt_offset(ce->state) + LRC_STATE_PN*PAGE_SIZE; + gma_head = 0; + gma_tail = ctx_size; + + s.buf_type = RING_BUFFER_CTX; + s.buf_addr_type = GTT_BUFFER; + s.vgpu = workload->vgpu; + s.engine = workload->engine; + s.ring_start = gma_start; + s.ring_size = ctx_size; + s.ring_head = gma_start + gma_head; + s.ring_tail = gma_start + gma_tail; + s.rb_va = ce->lrc_reg_state; + s.workload = workload; + s.is_ctx_wa = false; + s.is_init_ctx = false; + + /* don't scan the first RING_CTX_SIZE(0x50) dwords, as it's ring + * context + */ + ret = ip_gma_set(&s, gma_start + gma_head + RING_CTX_SIZE); + if (ret) + goto out; + + ret = command_scan(&s, gma_head, gma_tail, + gma_start, ctx_size); +out: + if (ret) + gvt_vgpu_err("scan shadow ctx error\n"); + + return ret; } static int init_cmd_table(struct intel_gvt *gvt) { + unsigned int gen_type = intel_gvt_get_device_type(gvt); int i; - struct cmd_entry *e; - struct cmd_info *info; - unsigned int gen_type; - - gen_type = intel_gvt_get_device_type(gvt); for (i = 0; i < ARRAY_SIZE(cmd_info); i++) { + struct cmd_entry *e; + if (!(cmd_info[i].devices & gen_type)) continue; @@ -2794,20 +3229,16 @@ static int init_cmd_table(struct intel_gvt *gvt) return -ENOMEM; e->info = &cmd_info[i]; - info = find_cmd_entry_any_ring(gvt, - e->info->opcode, e->info->rings); - if (info) { - gvt_err("%s %s duplicated\n", e->info->name, - info->name); - return -EEXIST; - } + if (cmd_info[i].opcode == OP_MI_NOOP) + mi_noop_index = i; INIT_HLIST_NODE(&e->hlist); add_cmd_entry(gvt, e); gvt_dbg_cmd("add %-30s op %04x flag %x devs %02x rings %02x\n", - e->info->name, e->info->opcode, e->info->flag, - e->info->devices, e->info->rings); + e->info->name, e->info->opcode, e->info->flag, + e->info->devices, e->info->rings); } + return 0; } |
