summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gvt/cmd_parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gvt/cmd_parser.c')
-rw-r--r--drivers/gpu/drm/i915/gvt/cmd_parser.c1133
1 files changed, 782 insertions, 351 deletions
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 713848c36349..df04e4ead8ea 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -35,11 +35,30 @@
*/
#include <linux/slab.h>
+
+#include <drm/drm_print.h>
+
#include "i915_drv.h"
+#include "i915_reg.h"
+#include "display/intel_display_regs.h"
+#include "gt/intel_engine_regs.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_lrc.h"
+#include "gt/intel_ring.h"
+#include "gt/intel_gt_requests.h"
+#include "gt/shmem_utils.h"
#include "gvt.h"
#include "i915_pvinfo.h"
#include "trace.h"
+#include "display/i9xx_plane_regs.h"
+#include "display/intel_display_core.h"
+#include "display/intel_sprite_regs.h"
+#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_context.h"
+
#define INVALID_OP (~0U)
#define OP_LEN_MI 9
@@ -55,10 +74,10 @@ struct sub_op_bits {
int low;
};
struct decode_info {
- char *name;
+ const char *name;
int op_len;
int nr_sub_op;
- struct sub_op_bits *sub_op;
+ const struct sub_op_bits *sub_op;
};
#define MAX_CMD_BUDGET 0x7fffffff
@@ -162,6 +181,7 @@ struct decode_info {
#define OP_STATE_BASE_ADDRESS OP_3D_MEDIA(0x0, 0x1, 0x01)
#define OP_STATE_SIP OP_3D_MEDIA(0x0, 0x1, 0x02)
#define OP_3D_MEDIA_0_1_4 OP_3D_MEDIA(0x0, 0x1, 0x04)
+#define OP_SWTESS_BASE_ADDRESS OP_3D_MEDIA(0x0, 0x1, 0x03)
#define OP_3DSTATE_VF_STATISTICS_GM45 OP_3D_MEDIA(0x1, 0x0, 0x0B)
@@ -172,6 +192,7 @@ struct decode_info {
#define OP_MEDIA_INTERFACE_DESCRIPTOR_LOAD OP_3D_MEDIA(0x2, 0x0, 0x2)
#define OP_MEDIA_GATEWAY_STATE OP_3D_MEDIA(0x2, 0x0, 0x3)
#define OP_MEDIA_STATE_FLUSH OP_3D_MEDIA(0x2, 0x0, 0x4)
+#define OP_MEDIA_POOL_STATE OP_3D_MEDIA(0x2, 0x0, 0x5)
#define OP_MEDIA_OBJECT OP_3D_MEDIA(0x2, 0x1, 0x0)
#define OP_MEDIA_OBJECT_PRT OP_3D_MEDIA(0x2, 0x1, 0x2)
@@ -373,35 +394,51 @@ typedef int (*parser_cmd_handler)(struct parser_exec_state *s);
#define ADDR_FIX_4(x1, x2, x3, x4) (ADDR_FIX_1(x1) | ADDR_FIX_3(x2, x3, x4))
#define ADDR_FIX_5(x1, x2, x3, x4, x5) (ADDR_FIX_1(x1) | ADDR_FIX_4(x2, x3, x4, x5))
+#define DWORD_FIELD(dword, end, start) \
+ FIELD_GET(GENMASK(end, start), cmd_val(s, dword))
+
+#define OP_LENGTH_BIAS 2
+#define CMD_LEN(value) (value + OP_LENGTH_BIAS)
+
+static int gvt_check_valid_cmd_length(int len, int valid_len)
+{
+ if (valid_len != len) {
+ gvt_err("len is not valid: len=%u valid_len=%u\n",
+ len, valid_len);
+ return -EFAULT;
+ }
+ return 0;
+}
+
struct cmd_info {
- char *name;
+ const char *name;
u32 opcode;
-#define F_LEN_MASK (1U<<0)
+#define F_LEN_MASK 3U
#define F_LEN_CONST 1U
#define F_LEN_VAR 0U
+/* value is const although LEN maybe variable */
+#define F_LEN_VAR_FIXED (1<<1)
/*
* command has its own ip advance logic
* e.g. MI_BATCH_START, MI_BATCH_END
*/
-#define F_IP_ADVANCE_CUSTOM (1<<1)
-
-#define F_POST_HANDLE (1<<2)
+#define F_IP_ADVANCE_CUSTOM (1<<2)
u32 flag;
-#define R_RCS (1 << RCS)
-#define R_VCS1 (1 << VCS)
-#define R_VCS2 (1 << VCS2)
+#define R_RCS BIT(RCS0)
+#define R_VCS1 BIT(VCS0)
+#define R_VCS2 BIT(VCS1)
#define R_VCS (R_VCS1 | R_VCS2)
-#define R_BCS (1 << BCS)
-#define R_VECS (1 << VECS)
+#define R_BCS BIT(BCS0)
+#define R_VECS BIT(VECS0)
#define R_ALL (R_RCS | R_VCS | R_BCS | R_VECS)
/* rings that support this cmd: BLT/RCS/VCS/VECS */
- uint16_t rings;
+ intel_engine_mask_t rings;
/* devices that support this cmd: SNB/IVB/HSW/... */
- uint16_t devices;
+ u16 devices;
/* which DWords are address that need fix up.
* bit 0 means a 32-bit non address operand in command
@@ -411,26 +448,30 @@ struct cmd_info {
* No matter the address length, each address only takes
* one bit in the bitmap.
*/
- uint16_t addr_bitmap;
+ u16 addr_bitmap;
/* flag == F_LEN_CONST : command length
* flag == F_LEN_VAR : length bias bits
* Note: length is in DWord
*/
- uint8_t len;
+ u32 len;
parser_cmd_handler handler;
+
+ /* valid length in DWord */
+ u32 valid_len;
};
struct cmd_entry {
struct hlist_node hlist;
- struct cmd_info *info;
+ const struct cmd_info *info;
};
enum {
RING_BUFFER_INSTRUCTION,
BATCH_BUFFER_INSTRUCTION,
BATCH_BUFFER_2ND_LEVEL,
+ RING_BUFFER_CTX,
};
enum {
@@ -440,7 +481,7 @@ enum {
struct parser_exec_state {
struct intel_vgpu *vgpu;
- int ring_id;
+ const struct intel_engine_cs *engine;
int buf_type;
@@ -471,8 +512,10 @@ struct parser_exec_state {
* used when ret from 2nd level batch buffer
*/
int saved_buf_addr_type;
+ bool is_ctx_wa;
+ bool is_init_ctx;
- struct cmd_info *info;
+ const struct cmd_info *info;
struct intel_vgpu_workload *workload;
};
@@ -483,12 +526,12 @@ struct parser_exec_state {
static unsigned long bypass_scan_mask = 0;
/* ring ALL, type = 0 */
-static struct sub_op_bits sub_op_mi[] = {
+static const struct sub_op_bits sub_op_mi[] = {
{31, 29},
{28, 23},
};
-static struct decode_info decode_info_mi = {
+static const struct decode_info decode_info_mi = {
"MI",
OP_LEN_MI,
ARRAY_SIZE(sub_op_mi),
@@ -496,12 +539,12 @@ static struct decode_info decode_info_mi = {
};
/* ring RCS, command type 2 */
-static struct sub_op_bits sub_op_2d[] = {
+static const struct sub_op_bits sub_op_2d[] = {
{31, 29},
{28, 22},
};
-static struct decode_info decode_info_2d = {
+static const struct decode_info decode_info_2d = {
"2D",
OP_LEN_2D,
ARRAY_SIZE(sub_op_2d),
@@ -509,14 +552,14 @@ static struct decode_info decode_info_2d = {
};
/* ring RCS, command type 3 */
-static struct sub_op_bits sub_op_3d_media[] = {
+static const struct sub_op_bits sub_op_3d_media[] = {
{31, 29},
{28, 27},
{26, 24},
{23, 16},
};
-static struct decode_info decode_info_3d_media = {
+static const struct decode_info decode_info_3d_media = {
"3D_Media",
OP_LEN_3D_MEDIA,
ARRAY_SIZE(sub_op_3d_media),
@@ -524,7 +567,7 @@ static struct decode_info decode_info_3d_media = {
};
/* ring VCS, command type 3 */
-static struct sub_op_bits sub_op_mfx_vc[] = {
+static const struct sub_op_bits sub_op_mfx_vc[] = {
{31, 29},
{28, 27},
{26, 24},
@@ -532,7 +575,7 @@ static struct sub_op_bits sub_op_mfx_vc[] = {
{20, 16},
};
-static struct decode_info decode_info_mfx_vc = {
+static const struct decode_info decode_info_mfx_vc = {
"MFX_VC",
OP_LEN_MFX_VC,
ARRAY_SIZE(sub_op_mfx_vc),
@@ -540,7 +583,7 @@ static struct decode_info decode_info_mfx_vc = {
};
/* ring VECS, command type 3 */
-static struct sub_op_bits sub_op_vebox[] = {
+static const struct sub_op_bits sub_op_vebox[] = {
{31, 29},
{28, 27},
{26, 24},
@@ -548,15 +591,15 @@ static struct sub_op_bits sub_op_vebox[] = {
{20, 16},
};
-static struct decode_info decode_info_vebox = {
+static const struct decode_info decode_info_vebox = {
"VEBOX",
OP_LEN_VEBOX,
ARRAY_SIZE(sub_op_vebox),
sub_op_vebox,
};
-static struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = {
- [RCS] = {
+static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = {
+ [RCS0] = {
&decode_info_mi,
NULL,
NULL,
@@ -567,7 +610,7 @@ static struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = {
NULL,
},
- [VCS] = {
+ [VCS0] = {
&decode_info_mi,
NULL,
NULL,
@@ -578,7 +621,7 @@ static struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = {
NULL,
},
- [BCS] = {
+ [BCS0] = {
&decode_info_mi,
NULL,
&decode_info_2d,
@@ -589,7 +632,7 @@ static struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = {
NULL,
},
- [VECS] = {
+ [VECS0] = {
&decode_info_mi,
NULL,
NULL,
@@ -600,7 +643,7 @@ static struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = {
NULL,
},
- [VCS2] = {
+ [VCS1] = {
&decode_info_mi,
NULL,
NULL,
@@ -612,40 +655,42 @@ static struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = {
},
};
-static inline u32 get_opcode(u32 cmd, int ring_id)
+static inline u32 get_opcode(u32 cmd, const struct intel_engine_cs *engine)
{
- struct decode_info *d_info;
+ const struct decode_info *d_info;
- d_info = ring_decode_info[ring_id][CMD_TYPE(cmd)];
+ d_info = ring_decode_info[engine->id][CMD_TYPE(cmd)];
if (d_info == NULL)
return INVALID_OP;
return cmd >> (32 - d_info->op_len);
}
-static inline struct cmd_info *find_cmd_entry(struct intel_gvt *gvt,
- unsigned int opcode, int ring_id)
+static inline const struct cmd_info *
+find_cmd_entry(struct intel_gvt *gvt, unsigned int opcode,
+ const struct intel_engine_cs *engine)
{
struct cmd_entry *e;
hash_for_each_possible(gvt->cmd_table, e, hlist, opcode) {
- if ((opcode == e->info->opcode) &&
- (e->info->rings & (1 << ring_id)))
+ if (opcode == e->info->opcode &&
+ e->info->rings & engine->mask)
return e->info;
}
return NULL;
}
-static inline struct cmd_info *get_cmd_info(struct intel_gvt *gvt,
- u32 cmd, int ring_id)
+static inline const struct cmd_info *
+get_cmd_info(struct intel_gvt *gvt, u32 cmd,
+ const struct intel_engine_cs *engine)
{
u32 opcode;
- opcode = get_opcode(cmd, ring_id);
+ opcode = get_opcode(cmd, engine);
if (opcode == INVALID_OP)
return NULL;
- return find_cmd_entry(gvt, opcode, ring_id);
+ return find_cmd_entry(gvt, opcode, engine);
}
static inline u32 sub_op_val(u32 cmd, u32 hi, u32 low)
@@ -653,12 +698,12 @@ static inline u32 sub_op_val(u32 cmd, u32 hi, u32 low)
return (cmd >> low) & ((1U << (hi - low + 1)) - 1);
}
-static inline void print_opcode(u32 cmd, int ring_id)
+static inline void print_opcode(u32 cmd, const struct intel_engine_cs *engine)
{
- struct decode_info *d_info;
+ const struct decode_info *d_info;
int i;
- d_info = ring_decode_info[ring_id][CMD_TYPE(cmd)];
+ d_info = ring_decode_info[engine->id][CMD_TYPE(cmd)];
if (d_info == NULL)
return;
@@ -682,19 +727,26 @@ static inline u32 cmd_val(struct parser_exec_state *s, int index)
return *cmd_ptr(s, index);
}
+static inline bool is_init_ctx(struct parser_exec_state *s)
+{
+ return (s->buf_type == RING_BUFFER_CTX && s->is_init_ctx);
+}
+
static void parser_exec_state_dump(struct parser_exec_state *s)
{
int cnt = 0;
int i;
- gvt_dbg_cmd(" vgpu%d RING%d: ring_start(%08lx) ring_end(%08lx)"
- " ring_head(%08lx) ring_tail(%08lx)\n", s->vgpu->id,
- s->ring_id, s->ring_start, s->ring_start + s->ring_size,
- s->ring_head, s->ring_tail);
+ gvt_dbg_cmd(" vgpu%d RING%s: ring_start(%08lx) ring_end(%08lx)"
+ " ring_head(%08lx) ring_tail(%08lx)\n",
+ s->vgpu->id, s->engine->name,
+ s->ring_start, s->ring_start + s->ring_size,
+ s->ring_head, s->ring_tail);
gvt_dbg_cmd(" %s %s ip_gma(%08lx) ",
s->buf_type == RING_BUFFER_INSTRUCTION ?
- "RING_BUFFER" : "BATCH_BUFFER",
+ "RING_BUFFER" : ((s->buf_type == RING_BUFFER_CTX) ?
+ "CTX_BUFFER" : "BATCH_BUFFER"),
s->buf_addr_type == GTT_BUFFER ?
"GTT" : "PPGTT", s->ip_gma);
@@ -707,20 +759,15 @@ static void parser_exec_state_dump(struct parser_exec_state *s)
s->ip_va, cmd_val(s, 0), cmd_val(s, 1),
cmd_val(s, 2), cmd_val(s, 3));
- print_opcode(cmd_val(s, 0), s->ring_id);
-
- /* print the whole page to trace */
- pr_err(" ip_va=%p: %08x %08x %08x %08x\n",
- s->ip_va, cmd_val(s, 0), cmd_val(s, 1),
- cmd_val(s, 2), cmd_val(s, 3));
+ print_opcode(cmd_val(s, 0), s->engine);
s->ip_va = (u32 *)((((u64)s->ip_va) >> 12) << 12);
while (cnt < 1024) {
- pr_err("ip_va=%p: ", s->ip_va);
+ gvt_dbg_cmd("ip_va=%p: ", s->ip_va);
for (i = 0; i < 8; i++)
- pr_err("%08x ", cmd_val(s, i));
- pr_err("\n");
+ gvt_dbg_cmd("%08x ", cmd_val(s, i));
+ gvt_dbg_cmd("\n");
s->ip_va += 8 * sizeof(u32);
cnt += 8;
@@ -734,7 +781,8 @@ static inline void update_ip_va(struct parser_exec_state *s)
if (WARN_ON(s->ring_head == s->ring_tail))
return;
- if (s->buf_type == RING_BUFFER_INSTRUCTION) {
+ if (s->buf_type == RING_BUFFER_INSTRUCTION ||
+ s->buf_type == RING_BUFFER_CTX) {
unsigned long ring_top = s->ring_start + s->ring_size;
if (s->ring_head > s->ring_tail) {
@@ -779,7 +827,7 @@ static inline int ip_gma_advance(struct parser_exec_state *s,
return 0;
}
-static inline int get_cmd_length(struct cmd_info *info, u32 cmd)
+static inline int get_cmd_length(const struct cmd_info *info, u32 cmd)
{
if ((info->flag & F_LEN_MASK) == F_LEN_CONST)
return info->len;
@@ -798,33 +846,48 @@ static inline int cmd_length(struct parser_exec_state *s)
*addr = val; \
} while (0)
-static bool is_shadowed_mmio(unsigned int offset)
+static inline bool is_mocs_mmio(unsigned int offset)
{
- bool ret = false;
-
- if ((offset == 0x2168) || /*BB current head register UDW */
- (offset == 0x2140) || /*BB current header register */
- (offset == 0x211c) || /*second BB header register UDW */
- (offset == 0x2114)) { /*second BB header register UDW */
- ret = true;
- }
- return ret;
+ return ((offset >= 0xc800) && (offset <= 0xcff8)) ||
+ ((offset >= 0xb020) && (offset <= 0xb0a0));
}
-static inline bool is_force_nonpriv_mmio(unsigned int offset)
+static int is_cmd_update_pdps(unsigned int offset,
+ struct parser_exec_state *s)
{
- return (offset >= 0x24d0 && offset < 0x2500);
+ u32 base = s->workload->engine->mmio_base;
+ return i915_mmio_reg_equal(_MMIO(offset), GEN8_RING_PDP_UDW(base, 0));
}
-static int force_nonpriv_reg_handler(struct parser_exec_state *s,
- unsigned int offset, unsigned int index)
+static int cmd_pdp_mmio_update_handler(struct parser_exec_state *s,
+ unsigned int offset, unsigned int index)
{
- struct intel_gvt *gvt = s->vgpu->gvt;
- unsigned int data = cmd_val(s, index + 1);
-
- if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data)) {
- gvt_err("Unexpected forcenonpriv 0x%x LRI write, value=0x%x\n",
- offset, data);
+ struct intel_vgpu *vgpu = s->vgpu;
+ struct intel_vgpu_mm *shadow_mm = s->workload->shadow_mm;
+ struct intel_vgpu_mm *mm;
+ u64 pdps[GEN8_3LVL_PDPES];
+
+ if (shadow_mm->ppgtt_mm.root_entry_type ==
+ GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
+ pdps[0] = (u64)cmd_val(s, 2) << 32;
+ pdps[0] |= cmd_val(s, 4);
+
+ mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
+ if (!mm) {
+ gvt_vgpu_err("failed to get the 4-level shadow vm\n");
+ return -EINVAL;
+ }
+ intel_vgpu_mm_get(mm);
+ list_add_tail(&mm->ppgtt_mm.link,
+ &s->workload->lri_shadow_mm);
+ *cmd_ptr(s, 2) = upper_32_bits(mm->ppgtt_mm.shadow_pdps[0]);
+ *cmd_ptr(s, 4) = lower_32_bits(mm->ppgtt_mm.shadow_pdps[0]);
+ } else {
+ /* Currently all guests use PML4 table and now can't
+ * have a guest with 3-level table but uses LRI for
+ * PPGTT update. So this is simply un-testable. */
+ GEM_BUG_ON(1);
+ gvt_vgpu_err("invalid shared shadow vm type\n");
return -EINVAL;
}
return 0;
@@ -835,26 +898,68 @@ static int cmd_reg_handler(struct parser_exec_state *s,
{
struct intel_vgpu *vgpu = s->vgpu;
struct intel_gvt *gvt = vgpu->gvt;
+ u32 ctx_sr_ctl;
+ u32 *vreg, vreg_old;
if (offset + 4 > gvt->device_info.mmio_size) {
gvt_vgpu_err("%s access to (%x) outside of MMIO range\n",
cmd, offset);
- return -EINVAL;
+ return -EFAULT;
}
- if (!intel_gvt_mmio_is_cmd_access(gvt, offset)) {
+ if (is_init_ctx(s)) {
+ struct intel_gvt_mmio_info *mmio_info;
+
+ intel_gvt_mmio_set_cmd_accessible(gvt, offset);
+ mmio_info = intel_gvt_find_mmio_info(gvt, offset);
+ if (mmio_info && mmio_info->write)
+ intel_gvt_mmio_set_cmd_write_patch(gvt, offset);
+ return 0;
+ }
+
+ if (!intel_gvt_mmio_is_cmd_accessible(gvt, offset)) {
gvt_vgpu_err("%s access to non-render register (%x)\n",
cmd, offset);
- return 0;
+ return -EBADRQC;
}
- if (is_shadowed_mmio(offset)) {
- gvt_vgpu_err("found access of shadowed MMIO %x\n", offset);
+ if (!strncmp(cmd, "srm", 3) ||
+ !strncmp(cmd, "lrm", 3)) {
+ if (offset == i915_mmio_reg_offset(GEN8_L3SQCREG4) ||
+ offset == 0x21f0 ||
+ (IS_BROADWELL(gvt->gt->i915) &&
+ offset == i915_mmio_reg_offset(INSTPM)))
+ return 0;
+ else {
+ gvt_vgpu_err("%s access to register (%x)\n",
+ cmd, offset);
+ return -EPERM;
+ }
+ }
+
+ if (!strncmp(cmd, "lrr-src", 7) ||
+ !strncmp(cmd, "lrr-dst", 7)) {
+ if (IS_BROADWELL(gvt->gt->i915) && offset == 0x215c)
+ return 0;
+ else {
+ gvt_vgpu_err("not allowed cmd %s reg (%x)\n", cmd, offset);
+ return -EPERM;
+ }
+ }
+
+ if (!strncmp(cmd, "pipe_ctrl", 9)) {
+ /* TODO: add LRI POST logic here */
return 0;
}
- if (is_force_nonpriv_mmio(offset) &&
- force_nonpriv_reg_handler(s, offset, index))
+ if (strncmp(cmd, "lri", 3))
+ return -EPERM;
+
+ /* below are all lri handlers */
+ vreg = &vgpu_vreg(s->vgpu, offset);
+
+ if (is_cmd_update_pdps(offset, s) &&
+ cmd_pdp_mmio_update_handler(s, offset, index))
return -EINVAL;
if (offset == i915_mmio_reg_offset(DERRMR) ||
@@ -863,8 +968,70 @@ static int cmd_reg_handler(struct parser_exec_state *s,
patch_value(s, cmd_ptr(s, index), VGT_PVINFO_PAGE);
}
- /* TODO: Update the global mask if this MMIO is a masked-MMIO */
- intel_gvt_mmio_set_cmd_accessed(gvt, offset);
+ if (is_mocs_mmio(offset))
+ *vreg = cmd_val(s, index + 1);
+
+ vreg_old = *vreg;
+
+ if (intel_gvt_mmio_is_cmd_write_patch(gvt, offset)) {
+ u32 cmdval_new, cmdval;
+ struct intel_gvt_mmio_info *mmio_info;
+
+ cmdval = cmd_val(s, index + 1);
+
+ mmio_info = intel_gvt_find_mmio_info(gvt, offset);
+ if (!mmio_info) {
+ cmdval_new = cmdval;
+ } else {
+ u64 ro_mask = mmio_info->ro_mask;
+ int ret;
+
+ if (likely(!ro_mask))
+ ret = mmio_info->write(s->vgpu, offset,
+ &cmdval, 4);
+ else {
+ gvt_vgpu_err("try to write RO reg %x\n",
+ offset);
+ ret = -EBADRQC;
+ }
+ if (ret)
+ return ret;
+ cmdval_new = *vreg;
+ }
+ if (cmdval_new != cmdval)
+ patch_value(s, cmd_ptr(s, index+1), cmdval_new);
+ }
+
+ /* only patch cmd. restore vreg value if changed in mmio write handler*/
+ *vreg = vreg_old;
+
+ /* TODO
+ * In order to let workload with inhibit context to generate
+ * correct image data into memory, vregs values will be loaded to
+ * hw via LRIs in the workload with inhibit context. But as
+ * indirect context is loaded prior to LRIs in workload, we don't
+ * want reg values specified in indirect context overwritten by
+ * LRIs in workloads. So, when scanning an indirect context, we
+ * update reg values in it into vregs, so LRIs in workload with
+ * inhibit context will restore with correct values
+ */
+ if (GRAPHICS_VER(s->engine->i915) == 9 &&
+ intel_gvt_mmio_is_sr_in_ctx(gvt, offset) &&
+ !strncmp(cmd, "lri", 3)) {
+ intel_gvt_read_gpa(s->vgpu,
+ s->workload->ring_context_gpa + 12, &ctx_sr_ctl, 4);
+ /* check inhibit context */
+ if (ctx_sr_ctl & 1) {
+ u32 data = cmd_val(s, index + 1);
+
+ if (intel_gvt_mmio_has_mode_mask(s->vgpu->gvt, offset))
+ intel_vgpu_mask_mmio_write(vgpu,
+ offset, &data, 4);
+ else
+ vgpu_vreg(vgpu, offset) = data;
+ }
+ }
+
return 0;
}
@@ -884,21 +1051,20 @@ static int cmd_handler_lri(struct parser_exec_state *s)
{
int i, ret = 0;
int cmd_len = cmd_length(s);
- struct intel_gvt *gvt = s->vgpu->gvt;
for (i = 1; i < cmd_len; i += 2) {
- if (IS_BROADWELL(gvt->dev_priv) &&
- (s->ring_id != RCS)) {
- if (s->ring_id == BCS &&
- cmd_reg(s, i) ==
- i915_mmio_reg_offset(DERRMR))
+ if (IS_BROADWELL(s->engine->i915) && s->engine->id != RCS0) {
+ if (s->engine->id == BCS0 &&
+ cmd_reg(s, i) == i915_mmio_reg_offset(DERRMR))
ret |= 0;
else
- ret |= (cmd_reg_inhibit(s, i)) ? -EINVAL : 0;
+ ret |= cmd_reg_inhibit(s, i) ? -EBADRQC : 0;
}
if (ret)
break;
ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lri");
+ if (ret)
+ break;
}
return ret;
}
@@ -909,14 +1075,18 @@ static int cmd_handler_lrr(struct parser_exec_state *s)
int cmd_len = cmd_length(s);
for (i = 1; i < cmd_len; i += 2) {
- if (IS_BROADWELL(s->vgpu->gvt->dev_priv))
+ if (IS_BROADWELL(s->engine->i915))
ret |= ((cmd_reg_inhibit(s, i) ||
- (cmd_reg_inhibit(s, i + 1)))) ?
- -EINVAL : 0;
+ (cmd_reg_inhibit(s, i + 1)))) ?
+ -EBADRQC : 0;
if (ret)
break;
ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lrr-src");
+ if (ret)
+ break;
ret |= cmd_reg_handler(s, cmd_reg(s, i + 1), i, "lrr-dst");
+ if (ret)
+ break;
}
return ret;
}
@@ -933,16 +1103,20 @@ static int cmd_handler_lrm(struct parser_exec_state *s)
int cmd_len = cmd_length(s);
for (i = 1; i < cmd_len;) {
- if (IS_BROADWELL(gvt->dev_priv))
- ret |= (cmd_reg_inhibit(s, i)) ? -EINVAL : 0;
+ if (IS_BROADWELL(s->engine->i915))
+ ret |= (cmd_reg_inhibit(s, i)) ? -EBADRQC : 0;
if (ret)
break;
ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lrm");
+ if (ret)
+ break;
if (cmd_val(s, 0) & (1 << 22)) {
gma = cmd_gma(s, i + 1);
if (gmadr_bytes == 8)
gma |= (cmd_gma_hi(s, i + 2)) << 32;
ret |= cmd_address_audit(s, gma, sizeof(u32), false);
+ if (ret)
+ break;
}
i += gmadr_dw_number(s) + 1;
}
@@ -958,11 +1132,15 @@ static int cmd_handler_srm(struct parser_exec_state *s)
for (i = 1; i < cmd_len;) {
ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "srm");
+ if (ret)
+ break;
if (cmd_val(s, 0) & (1 << 22)) {
gma = cmd_gma(s, i + 1);
if (gmadr_bytes == 8)
gma |= (cmd_gma_hi(s, i + 2)) << 32;
ret |= cmd_address_audit(s, gma, sizeof(u32), false);
+ if (ret)
+ break;
}
i += gmadr_dw_number(s) + 1;
}
@@ -975,28 +1153,28 @@ struct cmd_interrupt_event {
int mi_user_interrupt;
};
-static struct cmd_interrupt_event cmd_interrupt_events[] = {
- [RCS] = {
+static const struct cmd_interrupt_event cmd_interrupt_events[] = {
+ [RCS0] = {
.pipe_control_notify = RCS_PIPE_CONTROL,
.mi_flush_dw = INTEL_GVT_EVENT_RESERVED,
.mi_user_interrupt = RCS_MI_USER_INTERRUPT,
},
- [BCS] = {
+ [BCS0] = {
.pipe_control_notify = INTEL_GVT_EVENT_RESERVED,
.mi_flush_dw = BCS_MI_FLUSH_DW,
.mi_user_interrupt = BCS_MI_USER_INTERRUPT,
},
- [VCS] = {
+ [VCS0] = {
.pipe_control_notify = INTEL_GVT_EVENT_RESERVED,
.mi_flush_dw = VCS_MI_FLUSH_DW,
.mi_user_interrupt = VCS_MI_USER_INTERRUPT,
},
- [VCS2] = {
+ [VCS1] = {
.pipe_control_notify = INTEL_GVT_EVENT_RESERVED,
.mi_flush_dw = VCS2_MI_FLUSH_DW,
.mi_user_interrupt = VCS2_MI_USER_INTERRUPT,
},
- [VECS] = {
+ [VECS0] = {
.pipe_control_notify = INTEL_GVT_EVENT_RESERVED,
.mi_flush_dw = VECS_MI_FLUSH_DW,
.mi_user_interrupt = VECS_MI_USER_INTERRUPT,
@@ -1010,6 +1188,7 @@ static int cmd_handler_pipe_control(struct parser_exec_state *s)
bool index_mode = false;
unsigned int post_sync;
int ret = 0;
+ u32 hws_pga, val;
post_sync = (cmd_val(s, 1) & PIPE_CONTROL_POST_SYNC_OP_MASK) >> 14;
@@ -1033,6 +1212,15 @@ static int cmd_handler_pipe_control(struct parser_exec_state *s)
index_mode = true;
ret |= cmd_address_audit(s, gma, sizeof(u64),
index_mode);
+ if (ret)
+ return ret;
+ if (index_mode) {
+ hws_pga = s->vgpu->hws_pga[s->engine->id];
+ gma = hws_pga + gma;
+ patch_value(s, cmd_ptr(s, 2), gma);
+ val = cmd_val(s, 1) & (~(1 << 21));
+ patch_value(s, cmd_ptr(s, 1), val);
+ }
}
}
}
@@ -1041,15 +1229,16 @@ static int cmd_handler_pipe_control(struct parser_exec_state *s)
return ret;
if (cmd_val(s, 1) & PIPE_CONTROL_NOTIFY)
- set_bit(cmd_interrupt_events[s->ring_id].pipe_control_notify,
- s->workload->pending_events);
+ set_bit(cmd_interrupt_events[s->engine->id].pipe_control_notify,
+ s->workload->pending_events);
return 0;
}
static int cmd_handler_mi_user_interrupt(struct parser_exec_state *s)
{
- set_bit(cmd_interrupt_events[s->ring_id].mi_user_interrupt,
- s->workload->pending_events);
+ set_bit(cmd_interrupt_events[s->engine->id].mi_user_interrupt,
+ s->workload->pending_events);
+ patch_value(s, cmd_ptr(s, 0), MI_NOOP);
return 0;
}
@@ -1066,6 +1255,8 @@ static int cmd_handler_mi_batch_buffer_end(struct parser_exec_state *s)
s->buf_type = BATCH_BUFFER_INSTRUCTION;
ret = ip_gma_set(s, s->ret_ip_gma_bb);
s->buf_addr_type = s->saved_buf_addr_type;
+ } else if (s->buf_type == RING_BUFFER_CTX) {
+ ret = ip_gma_set(s, s->ring_tail);
} else {
s->buf_type = RING_BUFFER_INSTRUCTION;
s->buf_addr_type = GTT_BUFFER;
@@ -1098,7 +1289,8 @@ struct plane_code_mapping {
static int gen8_decode_mi_display_flip(struct parser_exec_state *s,
struct mi_display_flip_command_info *info)
{
- struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
+ struct drm_i915_private *dev_priv = s->engine->i915;
+ struct intel_display *display = dev_priv->display;
struct plane_code_mapping gen8_plane_code[] = {
[0] = {PIPE_A, PLANE_A, PRIMARY_A_FLIP_DONE},
[1] = {PIPE_B, PLANE_A, PRIMARY_B_FLIP_DONE},
@@ -1115,8 +1307,8 @@ static int gen8_decode_mi_display_flip(struct parser_exec_state *s,
dword2 = cmd_val(s, 2);
v = (dword0 & GENMASK(21, 19)) >> 19;
- if (WARN_ON(v >= ARRAY_SIZE(gen8_plane_code)))
- return -EINVAL;
+ if (drm_WARN_ON(&dev_priv->drm, v >= ARRAY_SIZE(gen8_plane_code)))
+ return -EBADRQC;
info->pipe = gen8_plane_code[v].pipe;
info->plane = gen8_plane_code[v].plane;
@@ -1127,16 +1319,16 @@ static int gen8_decode_mi_display_flip(struct parser_exec_state *s,
info->async_flip = ((dword2 & GENMASK(1, 0)) == 0x1);
if (info->plane == PLANE_A) {
- info->ctrl_reg = DSPCNTR(info->pipe);
- info->stride_reg = DSPSTRIDE(info->pipe);
- info->surf_reg = DSPSURF(info->pipe);
+ info->ctrl_reg = DSPCNTR(display, info->pipe);
+ info->stride_reg = DSPSTRIDE(display, info->pipe);
+ info->surf_reg = DSPSURF(display, info->pipe);
} else if (info->plane == PLANE_B) {
info->ctrl_reg = SPRCTL(info->pipe);
info->stride_reg = SPRSTRIDE(info->pipe);
info->surf_reg = SPRSURF(info->pipe);
} else {
- WARN_ON(1);
- return -EINVAL;
+ drm_WARN_ON(&dev_priv->drm, 1);
+ return -EBADRQC;
}
return 0;
}
@@ -1144,7 +1336,8 @@ static int gen8_decode_mi_display_flip(struct parser_exec_state *s,
static int skl_decode_mi_display_flip(struct parser_exec_state *s,
struct mi_display_flip_command_info *info)
{
- struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
+ struct drm_i915_private *dev_priv = s->engine->i915;
+ struct intel_display *display = dev_priv->display;
struct intel_vgpu *vgpu = s->vgpu;
u32 dword0 = cmd_val(s, 0);
u32 dword1 = cmd_val(s, 1);
@@ -1185,7 +1378,7 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s,
default:
gvt_vgpu_err("unknown plane code %d\n", plane);
- return -EINVAL;
+ return -EBADRQC;
}
info->stride_val = (dword1 & GENMASK(15, 6)) >> 6;
@@ -1193,9 +1386,9 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s,
info->surf_val = (dword2 & GENMASK(31, 12)) >> 12;
info->async_flip = ((dword2 & GENMASK(1, 0)) == 0x1);
- info->ctrl_reg = DSPCNTR(info->pipe);
- info->stride_reg = DSPSTRIDE(info->pipe);
- info->surf_reg = DSPSURF(info->pipe);
+ info->ctrl_reg = DSPCNTR(display, info->pipe);
+ info->stride_reg = DSPSTRIDE(display, info->pipe);
+ info->surf_reg = DSPSURF(display, info->pipe);
return 0;
}
@@ -1203,20 +1396,19 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s,
static int gen8_check_mi_display_flip(struct parser_exec_state *s,
struct mi_display_flip_command_info *info)
{
- struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
u32 stride, tile;
if (!info->async_flip)
return 0;
- if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
- stride = vgpu_vreg(s->vgpu, info->stride_reg) & GENMASK(9, 0);
- tile = (vgpu_vreg(s->vgpu, info->ctrl_reg) &
+ if (GRAPHICS_VER(s->engine->i915) >= 9) {
+ stride = vgpu_vreg_t(s->vgpu, info->stride_reg) & GENMASK(9, 0);
+ tile = (vgpu_vreg_t(s->vgpu, info->ctrl_reg) &
GENMASK(12, 10)) >> 10;
} else {
- stride = (vgpu_vreg(s->vgpu, info->stride_reg) &
+ stride = (vgpu_vreg_t(s->vgpu, info->stride_reg) &
GENMASK(15, 6)) >> 6;
- tile = (vgpu_vreg(s->vgpu, info->ctrl_reg) & (1 << 10)) >> 10;
+ tile = (vgpu_vreg_t(s->vgpu, info->ctrl_reg) & (1 << 10)) >> 10;
}
if (stride != info->stride_val)
@@ -1232,36 +1424,41 @@ static int gen8_update_plane_mmio_from_mi_display_flip(
struct parser_exec_state *s,
struct mi_display_flip_command_info *info)
{
- struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
+ struct drm_i915_private *dev_priv = s->engine->i915;
+ struct intel_display *display = dev_priv->display;
struct intel_vgpu *vgpu = s->vgpu;
- set_mask_bits(&vgpu_vreg(vgpu, info->surf_reg), GENMASK(31, 12),
+ set_mask_bits(&vgpu_vreg_t(vgpu, info->surf_reg), GENMASK(31, 12),
info->surf_val << 12);
- if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
- set_mask_bits(&vgpu_vreg(vgpu, info->stride_reg), GENMASK(9, 0),
+ if (GRAPHICS_VER(dev_priv) >= 9) {
+ set_mask_bits(&vgpu_vreg_t(vgpu, info->stride_reg), GENMASK(9, 0),
info->stride_val);
- set_mask_bits(&vgpu_vreg(vgpu, info->ctrl_reg), GENMASK(12, 10),
+ set_mask_bits(&vgpu_vreg_t(vgpu, info->ctrl_reg), GENMASK(12, 10),
info->tile_val << 10);
} else {
- set_mask_bits(&vgpu_vreg(vgpu, info->stride_reg), GENMASK(15, 6),
+ set_mask_bits(&vgpu_vreg_t(vgpu, info->stride_reg), GENMASK(15, 6),
info->stride_val << 6);
- set_mask_bits(&vgpu_vreg(vgpu, info->ctrl_reg), GENMASK(10, 10),
+ set_mask_bits(&vgpu_vreg_t(vgpu, info->ctrl_reg), GENMASK(10, 10),
info->tile_val << 10);
}
- vgpu_vreg(vgpu, PIPE_FRMCOUNT_G4X(info->pipe))++;
- intel_vgpu_trigger_virtual_event(vgpu, info->event);
+ if (info->plane == PLANE_PRIMARY)
+ vgpu_vreg_t(vgpu, PIPE_FLIPCOUNT_G4X(display, info->pipe))++;
+
+ if (info->async_flip)
+ intel_vgpu_trigger_virtual_event(vgpu, info->event);
+ else
+ set_bit(info->event, vgpu->irq.flip_done_event[info->pipe]);
+
return 0;
}
static int decode_mi_display_flip(struct parser_exec_state *s,
struct mi_display_flip_command_info *info)
{
- struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
-
- if (IS_BROADWELL(dev_priv))
+ if (IS_BROADWELL(s->engine->i915))
return gen8_decode_mi_display_flip(s, info);
- if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
+ if (GRAPHICS_VER(s->engine->i915) >= 9)
return skl_decode_mi_display_flip(s, info);
return -ENODEV;
@@ -1270,26 +1467,14 @@ static int decode_mi_display_flip(struct parser_exec_state *s,
static int check_mi_display_flip(struct parser_exec_state *s,
struct mi_display_flip_command_info *info)
{
- struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
-
- if (IS_BROADWELL(dev_priv)
- || IS_SKYLAKE(dev_priv)
- || IS_KABYLAKE(dev_priv))
- return gen8_check_mi_display_flip(s, info);
- return -ENODEV;
+ return gen8_check_mi_display_flip(s, info);
}
static int update_plane_mmio_from_mi_display_flip(
struct parser_exec_state *s,
struct mi_display_flip_command_info *info)
{
- struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
-
- if (IS_BROADWELL(dev_priv)
- || IS_SKYLAKE(dev_priv)
- || IS_KABYLAKE(dev_priv))
- return gen8_update_plane_mmio_from_mi_display_flip(s, info);
- return -ENODEV;
+ return gen8_update_plane_mmio_from_mi_display_flip(s, info);
}
static int cmd_handler_mi_display_flip(struct parser_exec_state *s)
@@ -1299,6 +1484,15 @@ static int cmd_handler_mi_display_flip(struct parser_exec_state *s)
int ret;
int i;
int len = cmd_length(s);
+ u32 valid_len = CMD_LEN(1);
+
+ /* Flip Type == Stereo 3D Flip */
+ if (DWORD_FIELD(2, 1, 0) == 2)
+ valid_len++;
+ ret = gvt_check_valid_cmd_length(cmd_length(s),
+ valid_len);
+ if (ret)
+ return ret;
ret = decode_mi_display_flip(s, &info);
if (ret) {
@@ -1348,10 +1542,13 @@ static unsigned long get_gma_bb_from_cmd(struct parser_exec_state *s, int index)
{
unsigned long addr;
unsigned long gma_high, gma_low;
- int gmadr_bytes = s->vgpu->gvt->device_info.gmadr_bytes_in_cmd;
+ struct intel_vgpu *vgpu = s->vgpu;
+ int gmadr_bytes = vgpu->gvt->device_info.gmadr_bytes_in_cmd;
- if (WARN_ON(gmadr_bytes != 4 && gmadr_bytes != 8))
+ if (WARN_ON(gmadr_bytes != 4 && gmadr_bytes != 8)) {
+ gvt_vgpu_err("invalid gma bytes %d\n", gmadr_bytes);
return INTEL_GVT_INVALID_ADDR;
+ }
gma_low = cmd_val(s, index) & BATCH_BUFFER_ADDR_MASK;
if (gmadr_bytes == 4) {
@@ -1374,21 +1571,21 @@ static inline int cmd_address_audit(struct parser_exec_state *s,
if (op_size > max_surface_size) {
gvt_vgpu_err("command address audit fail name %s\n",
s->info->name);
- return -EINVAL;
+ return -EFAULT;
}
if (index_mode) {
- if (guest_gma >= GTT_PAGE_SIZE / sizeof(u64)) {
- ret = -EINVAL;
+ if (guest_gma >= I915_GTT_PAGE_SIZE) {
+ ret = -EFAULT;
goto err;
}
- } else if ((!vgpu_gmadr_is_valid(s->vgpu, guest_gma)) ||
- (!vgpu_gmadr_is_valid(s->vgpu,
- guest_gma + op_size - 1))) {
- ret = -EINVAL;
+ } else if (!intel_gvt_ggtt_validate_range(vgpu, guest_gma, op_size)) {
+ ret = -EFAULT;
goto err;
}
+
return 0;
+
err:
gvt_vgpu_err("cmd_parser: Malicious %s detected, addr=0x%lx, len=%d!\n",
s->info->name, guest_gma, op_size);
@@ -1415,12 +1612,21 @@ static int cmd_handler_mi_store_data_imm(struct parser_exec_state *s)
int op_size = (cmd_length(s) - 3) * sizeof(u32);
int core_id = (cmd_val(s, 2) & (1 << 0)) ? 1 : 0;
unsigned long gma, gma_low, gma_high;
+ u32 valid_len = CMD_LEN(2);
int ret = 0;
/* check ppggt */
if (!(cmd_val(s, 0) & (1 << 22)))
return 0;
+ /* check if QWORD */
+ if (DWORD_FIELD(0, 21, 21))
+ valid_len++;
+ ret = gvt_check_valid_cmd_length(cmd_length(s),
+ valid_len);
+ if (ret)
+ return ret;
+
gma = cmd_val(s, 2) & GENMASK(31, 2);
if (gmadr_bytes == 8) {
@@ -1439,7 +1645,7 @@ static inline int unexpected_cmd(struct parser_exec_state *s)
gvt_vgpu_err("Unexpected %s in command buffer!\n", s->info->name);
- return -EINVAL;
+ return -EBADRQC;
}
static int cmd_handler_mi_semaphore_wait(struct parser_exec_state *s)
@@ -1463,11 +1669,20 @@ static int cmd_handler_mi_op_2f(struct parser_exec_state *s)
int op_size = (1 << ((cmd_val(s, 0) & GENMASK(20, 19)) >> 19)) *
sizeof(u32);
unsigned long gma, gma_high;
+ u32 valid_len = CMD_LEN(1);
int ret = 0;
if (!(cmd_val(s, 0) & (1 << 22)))
return ret;
+ /* check inline data */
+ if (cmd_val(s, 0) & BIT(18))
+ valid_len = CMD_LEN(9);
+ ret = gvt_check_valid_cmd_length(cmd_length(s),
+ valid_len);
+ if (ret)
+ return ret;
+
gma = cmd_val(s, 1) & GENMASK(31, 2);
if (gmadr_bytes == 8) {
gma_high = cmd_val(s, 2) & GENMASK(15, 0);
@@ -1504,6 +1719,17 @@ static int cmd_handler_mi_flush_dw(struct parser_exec_state *s)
unsigned long gma;
bool index_mode = false;
int ret = 0;
+ u32 hws_pga, val;
+ u32 valid_len = CMD_LEN(2);
+
+ ret = gvt_check_valid_cmd_length(cmd_length(s),
+ valid_len);
+ if (ret) {
+ /* Check again for Qword */
+ ret = gvt_check_valid_cmd_length(cmd_length(s),
+ ++valid_len);
+ return ret;
+ }
/* Check post-sync and ppgtt bit */
if (((cmd_val(s, 0) >> 14) & 0x3) && (cmd_val(s, 1) & (1 << 2))) {
@@ -1514,11 +1740,20 @@ static int cmd_handler_mi_flush_dw(struct parser_exec_state *s)
if (cmd_val(s, 0) & (1 << 21))
index_mode = true;
ret = cmd_address_audit(s, gma, sizeof(u64), index_mode);
+ if (ret)
+ return ret;
+ if (index_mode) {
+ hws_pga = s->vgpu->hws_pga[s->engine->id];
+ gma = hws_pga + gma;
+ patch_value(s, cmd_ptr(s, 1), gma);
+ val = cmd_val(s, 0) & (~(1 << 21));
+ patch_value(s, cmd_ptr(s, 0), val);
+ }
}
/* Check notify bit */
if ((cmd_val(s, 0) & (1 << 8)))
- set_bit(cmd_interrupt_events[s->ring_id].mi_flush_dw,
- s->workload->pending_events);
+ set_bit(cmd_interrupt_events[s->engine->id].mi_flush_dw,
+ s->workload->pending_events);
return ret;
}
@@ -1545,12 +1780,12 @@ static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm,
return -EFAULT;
}
- offset = gma & (GTT_PAGE_SIZE - 1);
+ offset = gma & (I915_GTT_PAGE_SIZE - 1);
- copy_len = (end_gma - gma) >= (GTT_PAGE_SIZE - offset) ?
- GTT_PAGE_SIZE - offset : end_gma - gma;
+ copy_len = (end_gma - gma) >= (I915_GTT_PAGE_SIZE - offset) ?
+ I915_GTT_PAGE_SIZE - offset : end_gma - gma;
- intel_gvt_hypervisor_read_gpa(vgpu, gpa, va + len, copy_len);
+ intel_gvt_read_gpa(vgpu, gpa, va + len, copy_len);
len += copy_len;
gma += copy_len;
@@ -1565,138 +1800,201 @@ static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm,
*/
static int batch_buffer_needs_scan(struct parser_exec_state *s)
{
- struct intel_gvt *gvt = s->vgpu->gvt;
+ /* Decide privilege based on address space */
+ if (cmd_val(s, 0) & BIT(8) &&
+ !(s->vgpu->scan_nonprivbb & s->engine->mask))
+ return 0;
- if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)
- || IS_KABYLAKE(gvt->dev_priv)) {
- /* BDW decides privilege based on address space */
- if (cmd_val(s, 0) & (1 << 8))
- return 0;
- }
return 1;
}
-static uint32_t find_bb_size(struct parser_exec_state *s)
+static const char *repr_addr_type(unsigned int type)
+{
+ return type == PPGTT_BUFFER ? "ppgtt" : "ggtt";
+}
+
+static int find_bb_size(struct parser_exec_state *s,
+ unsigned long *bb_size,
+ unsigned long *bb_end_cmd_offset)
{
unsigned long gma = 0;
- struct cmd_info *info;
- uint32_t bb_size = 0;
- uint32_t cmd_len = 0;
- bool met_bb_end = false;
+ const struct cmd_info *info;
+ u32 cmd_len = 0;
+ bool bb_end = false;
struct intel_vgpu *vgpu = s->vgpu;
u32 cmd;
+ struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ?
+ s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm;
+
+ *bb_size = 0;
+ *bb_end_cmd_offset = 0;
/* get the start gm address of the batch buffer */
gma = get_gma_bb_from_cmd(s, 1);
- cmd = cmd_val(s, 0);
+ if (gma == INTEL_GVT_INVALID_ADDR)
+ return -EFAULT;
- info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
+ cmd = cmd_val(s, 0);
+ info = get_cmd_info(s->vgpu->gvt, cmd, s->engine);
if (info == NULL) {
- gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
- cmd, get_opcode(cmd, s->ring_id));
- return -EINVAL;
+ gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %s, workload=%p\n",
+ cmd, get_opcode(cmd, s->engine),
+ repr_addr_type(s->buf_addr_type),
+ s->engine->name, s->workload);
+ return -EBADRQC;
}
do {
- copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
- gma, gma + 4, &cmd);
- info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
+ if (copy_gma_to_hva(s->vgpu, mm,
+ gma, gma + 4, &cmd) < 0)
+ return -EFAULT;
+ info = get_cmd_info(s->vgpu->gvt, cmd, s->engine);
if (info == NULL) {
- gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
- cmd, get_opcode(cmd, s->ring_id));
- return -EINVAL;
+ gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %s, workload=%p\n",
+ cmd, get_opcode(cmd, s->engine),
+ repr_addr_type(s->buf_addr_type),
+ s->engine->name, s->workload);
+ return -EBADRQC;
}
if (info->opcode == OP_MI_BATCH_BUFFER_END) {
- met_bb_end = true;
+ bb_end = true;
} else if (info->opcode == OP_MI_BATCH_BUFFER_START) {
- if (BATCH_BUFFER_2ND_LEVEL_BIT(cmd) == 0) {
+ if (BATCH_BUFFER_2ND_LEVEL_BIT(cmd) == 0)
/* chained batch buffer */
- met_bb_end = true;
- }
+ bb_end = true;
}
+
+ if (bb_end)
+ *bb_end_cmd_offset = *bb_size;
+
cmd_len = get_cmd_length(info, cmd) << 2;
- bb_size += cmd_len;
+ *bb_size += cmd_len;
gma += cmd_len;
+ } while (!bb_end);
+
+ return 0;
+}
+
+static int audit_bb_end(struct parser_exec_state *s, void *va)
+{
+ struct intel_vgpu *vgpu = s->vgpu;
+ u32 cmd = *(u32 *)va;
+ const struct cmd_info *info;
+
+ info = get_cmd_info(s->vgpu->gvt, cmd, s->engine);
+ if (info == NULL) {
+ gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %s, workload=%p\n",
+ cmd, get_opcode(cmd, s->engine),
+ repr_addr_type(s->buf_addr_type),
+ s->engine->name, s->workload);
+ return -EBADRQC;
+ }
- } while (!met_bb_end);
+ if ((info->opcode == OP_MI_BATCH_BUFFER_END) ||
+ ((info->opcode == OP_MI_BATCH_BUFFER_START) &&
+ (BATCH_BUFFER_2ND_LEVEL_BIT(cmd) == 0)))
+ return 0;
- return bb_size;
+ return -EBADRQC;
}
static int perform_bb_shadow(struct parser_exec_state *s)
{
- struct intel_shadow_bb_entry *entry_obj;
struct intel_vgpu *vgpu = s->vgpu;
+ struct intel_vgpu_shadow_bb *bb;
unsigned long gma = 0;
- uint32_t bb_size;
- void *dst = NULL;
+ unsigned long bb_size;
+ unsigned long bb_end_cmd_offset;
int ret = 0;
+ struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ?
+ s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm;
+ unsigned long start_offset = 0;
- /* get the start gm address of the batch buffer */
+ /* Get the start gm address of the batch buffer */
gma = get_gma_bb_from_cmd(s, 1);
+ if (gma == INTEL_GVT_INVALID_ADDR)
+ return -EFAULT;
- /* get the size of the batch buffer */
- bb_size = find_bb_size(s);
+ ret = find_bb_size(s, &bb_size, &bb_end_cmd_offset);
+ if (ret)
+ return ret;
- /* allocate shadow batch buffer */
- entry_obj = kmalloc(sizeof(*entry_obj), GFP_KERNEL);
- if (entry_obj == NULL)
+ bb = kzalloc(sizeof(*bb), GFP_KERNEL);
+ if (!bb)
return -ENOMEM;
- entry_obj->obj =
- i915_gem_object_create(s->vgpu->gvt->dev_priv,
- roundup(bb_size, PAGE_SIZE));
- if (IS_ERR(entry_obj->obj)) {
- ret = PTR_ERR(entry_obj->obj);
- goto free_entry;
- }
- entry_obj->len = bb_size;
- INIT_LIST_HEAD(&entry_obj->list);
+ bb->ppgtt = s->buf_addr_type != GTT_BUFFER;
- dst = i915_gem_object_pin_map(entry_obj->obj, I915_MAP_WB);
- if (IS_ERR(dst)) {
- ret = PTR_ERR(dst);
- goto put_obj;
+ /*
+ * The start_offset stores the batch buffer's start gma's
+ * offset relative to page boundary. So for non-privileged batch
+ * buffer, the shadowed gem object holds exactly the same page
+ * layout as original gem object. This is for the convenience of
+ * replacing the whole non-privilged batch buffer page to this
+ * shadowed one in PPGTT at the same gma address. (This replacing
+ * action is not implemented yet now, but may be necessary in
+ * future).
+ * For prileged batch buffer, we just change start gma address to
+ * that of shadowed page.
+ */
+ if (bb->ppgtt)
+ start_offset = gma & ~I915_GTT_PAGE_MASK;
+
+ bb->obj = i915_gem_object_create_shmem(s->engine->i915,
+ round_up(bb_size + start_offset,
+ PAGE_SIZE));
+ if (IS_ERR(bb->obj)) {
+ ret = PTR_ERR(bb->obj);
+ goto err_free_bb;
}
- ret = i915_gem_object_set_to_cpu_domain(entry_obj->obj, false);
- if (ret) {
- gvt_vgpu_err("failed to set shadow batch to CPU\n");
- goto unmap_src;
+ bb->va = i915_gem_object_pin_map(bb->obj, I915_MAP_WB);
+ if (IS_ERR(bb->va)) {
+ ret = PTR_ERR(bb->va);
+ goto err_free_obj;
}
- entry_obj->va = dst;
- entry_obj->bb_start_cmd_va = s->ip_va;
-
- /* copy batch buffer to shadow batch buffer*/
- ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
+ ret = copy_gma_to_hva(s->vgpu, mm,
gma, gma + bb_size,
- dst);
+ bb->va + start_offset);
if (ret < 0) {
gvt_vgpu_err("fail to copy guest ring buffer\n");
- goto unmap_src;
+ ret = -EFAULT;
+ goto err_unmap;
}
- list_add(&entry_obj->list, &s->workload->shadow_bb);
+ ret = audit_bb_end(s, bb->va + start_offset + bb_end_cmd_offset);
+ if (ret)
+ goto err_unmap;
+
+ i915_gem_object_unlock(bb->obj);
+ INIT_LIST_HEAD(&bb->list);
+ list_add(&bb->list, &s->workload->shadow_bb);
+
+ bb->bb_start_cmd_va = s->ip_va;
+
+ if ((s->buf_type == BATCH_BUFFER_INSTRUCTION) && (!s->is_ctx_wa))
+ bb->bb_offset = s->ip_va - s->rb_va;
+ else
+ bb->bb_offset = 0;
+
/*
* ip_va saves the virtual address of the shadow batch buffer, while
* ip_gma saves the graphics address of the original batch buffer.
- * As the shadow batch buffer is just a copy from the originial one,
+ * As the shadow batch buffer is just a copy from the original one,
* it should be right to use shadow batch buffer'va and original batch
* buffer's gma in pair. After all, we don't want to pin the shadow
* buffer here (too early).
*/
- s->ip_va = dst;
+ s->ip_va = bb->va + start_offset;
s->ip_gma = gma;
-
return 0;
-
-unmap_src:
- i915_gem_object_unpin_map(entry_obj->obj);
-put_obj:
- i915_gem_object_put(entry_obj->obj);
-free_entry:
- kfree(entry_obj);
+err_unmap:
+ i915_gem_object_unpin_map(bb->obj);
+err_free_obj:
+ i915_gem_object_put(bb->obj);
+err_free_bb:
+ kfree(bb);
return ret;
}
@@ -1708,13 +2006,13 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s)
if (s->buf_type == BATCH_BUFFER_2ND_LEVEL) {
gvt_vgpu_err("Found MI_BATCH_BUFFER_START in 2nd level BB\n");
- return -EINVAL;
+ return -EFAULT;
}
second_level = BATCH_BUFFER_2ND_LEVEL_BIT(cmd_val(s, 0)) == 1;
if (second_level && (s->buf_type != BATCH_BUFFER_INSTRUCTION)) {
gvt_vgpu_err("Jumping to 2nd level BB from RB is not allowed\n");
- return -EINVAL;
+ return -EFAULT;
}
s->saved_buf_addr_type = s->buf_addr_type;
@@ -1738,11 +2036,12 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s)
if (ret < 0)
return ret;
}
-
return ret;
}
-static struct cmd_info cmd_info[] = {
+static int mi_noop_index;
+
+static const struct cmd_info cmd_info[] = {
{"MI_NOOP", OP_MI_NOOP, F_LEN_CONST, R_ALL, D_ALL, 0, 1, NULL},
{"MI_SET_PREDICATE", OP_MI_SET_PREDICATE, F_LEN_CONST, R_ALL, D_ALL,
@@ -1790,21 +2089,24 @@ static struct cmd_info cmd_info[] = {
{"MI_RS_CONTEXT", OP_MI_RS_CONTEXT, F_LEN_CONST, R_RCS, D_ALL, 0, 1,
NULL},
- {"MI_DISPLAY_FLIP", OP_MI_DISPLAY_FLIP, F_LEN_VAR | F_POST_HANDLE,
+ {"MI_DISPLAY_FLIP", OP_MI_DISPLAY_FLIP, F_LEN_VAR,
R_RCS | R_BCS, D_ALL, 0, 8, cmd_handler_mi_display_flip},
- {"MI_SEMAPHORE_MBOX", OP_MI_SEMAPHORE_MBOX, F_LEN_VAR, R_ALL, D_ALL,
- 0, 8, NULL},
+ {"MI_SEMAPHORE_MBOX", OP_MI_SEMAPHORE_MBOX, F_LEN_VAR | F_LEN_VAR_FIXED,
+ R_ALL, D_ALL, 0, 8, NULL, CMD_LEN(1)},
{"MI_MATH", OP_MI_MATH, F_LEN_VAR, R_ALL, D_ALL, 0, 8, NULL},
- {"MI_URB_CLEAR", OP_MI_URB_CLEAR, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+ {"MI_URB_CLEAR", OP_MI_URB_CLEAR, F_LEN_VAR | F_LEN_VAR_FIXED, R_RCS,
+ D_ALL, 0, 8, NULL, CMD_LEN(0)},
- {"ME_SEMAPHORE_SIGNAL", OP_MI_SEMAPHORE_SIGNAL, F_LEN_VAR, R_ALL,
- D_BDW_PLUS, 0, 8, NULL},
+ {"MI_SEMAPHORE_SIGNAL", OP_MI_SEMAPHORE_SIGNAL,
+ F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_BDW_PLUS, 0, 8,
+ NULL, CMD_LEN(0)},
- {"ME_SEMAPHORE_WAIT", OP_MI_SEMAPHORE_WAIT, F_LEN_VAR, R_ALL, D_BDW_PLUS,
- ADDR_FIX_1(2), 8, cmd_handler_mi_semaphore_wait},
+ {"MI_SEMAPHORE_WAIT", OP_MI_SEMAPHORE_WAIT,
+ F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_BDW_PLUS, ADDR_FIX_1(2),
+ 8, cmd_handler_mi_semaphore_wait, CMD_LEN(2)},
{"MI_STORE_DATA_IMM", OP_MI_STORE_DATA_IMM, F_LEN_VAR, R_ALL, D_BDW_PLUS,
ADDR_FIX_1(1), 10, cmd_handler_mi_store_data_imm},
@@ -1818,8 +2120,9 @@ static struct cmd_info cmd_info[] = {
{"MI_UPDATE_GTT", OP_MI_UPDATE_GTT, F_LEN_VAR, R_ALL, D_BDW_PLUS, 0, 10,
cmd_handler_mi_update_gtt},
- {"MI_STORE_REGISTER_MEM", OP_MI_STORE_REGISTER_MEM, F_LEN_VAR, R_ALL,
- D_ALL, ADDR_FIX_1(2), 8, cmd_handler_srm},
+ {"MI_STORE_REGISTER_MEM", OP_MI_STORE_REGISTER_MEM,
+ F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, ADDR_FIX_1(2), 8,
+ cmd_handler_srm, CMD_LEN(2)},
{"MI_FLUSH_DW", OP_MI_FLUSH_DW, F_LEN_VAR, R_ALL, D_ALL, 0, 6,
cmd_handler_mi_flush_dw},
@@ -1827,26 +2130,30 @@ static struct cmd_info cmd_info[] = {
{"MI_CLFLUSH", OP_MI_CLFLUSH, F_LEN_VAR, R_ALL, D_ALL, ADDR_FIX_1(1),
10, cmd_handler_mi_clflush},
- {"MI_REPORT_PERF_COUNT", OP_MI_REPORT_PERF_COUNT, F_LEN_VAR, R_ALL,
- D_ALL, ADDR_FIX_1(1), 6, cmd_handler_mi_report_perf_count},
+ {"MI_REPORT_PERF_COUNT", OP_MI_REPORT_PERF_COUNT,
+ F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, ADDR_FIX_1(1), 6,
+ cmd_handler_mi_report_perf_count, CMD_LEN(2)},
- {"MI_LOAD_REGISTER_MEM", OP_MI_LOAD_REGISTER_MEM, F_LEN_VAR, R_ALL,
- D_ALL, ADDR_FIX_1(2), 8, cmd_handler_lrm},
+ {"MI_LOAD_REGISTER_MEM", OP_MI_LOAD_REGISTER_MEM,
+ F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, ADDR_FIX_1(2), 8,
+ cmd_handler_lrm, CMD_LEN(2)},
- {"MI_LOAD_REGISTER_REG", OP_MI_LOAD_REGISTER_REG, F_LEN_VAR, R_ALL,
- D_ALL, 0, 8, cmd_handler_lrr},
+ {"MI_LOAD_REGISTER_REG", OP_MI_LOAD_REGISTER_REG,
+ F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, 0, 8,
+ cmd_handler_lrr, CMD_LEN(1)},
- {"MI_RS_STORE_DATA_IMM", OP_MI_RS_STORE_DATA_IMM, F_LEN_VAR, R_RCS,
- D_ALL, 0, 8, NULL},
+ {"MI_RS_STORE_DATA_IMM", OP_MI_RS_STORE_DATA_IMM,
+ F_LEN_VAR | F_LEN_VAR_FIXED, R_RCS, D_ALL, 0,
+ 8, NULL, CMD_LEN(2)},
- {"MI_LOAD_URB_MEM", OP_MI_LOAD_URB_MEM, F_LEN_VAR, R_RCS, D_ALL,
- ADDR_FIX_1(2), 8, NULL},
+ {"MI_LOAD_URB_MEM", OP_MI_LOAD_URB_MEM, F_LEN_VAR | F_LEN_VAR_FIXED,
+ R_RCS, D_ALL, ADDR_FIX_1(2), 8, NULL, CMD_LEN(2)},
{"MI_STORE_URM_MEM", OP_MI_STORE_URM_MEM, F_LEN_VAR, R_RCS, D_ALL,
ADDR_FIX_1(2), 8, NULL},
- {"MI_OP_2E", OP_MI_2E, F_LEN_VAR, R_ALL, D_BDW_PLUS, ADDR_FIX_2(1, 2),
- 8, cmd_handler_mi_op_2e},
+ {"MI_OP_2E", OP_MI_2E, F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_BDW_PLUS,
+ ADDR_FIX_2(1, 2), 8, cmd_handler_mi_op_2e, CMD_LEN(3)},
{"MI_OP_2F", OP_MI_2F, F_LEN_VAR, R_ALL, D_BDW_PLUS, ADDR_FIX_1(1),
8, cmd_handler_mi_op_2f},
@@ -1856,8 +2163,8 @@ static struct cmd_info cmd_info[] = {
cmd_handler_mi_batch_buffer_start},
{"MI_CONDITIONAL_BATCH_BUFFER_END", OP_MI_CONDITIONAL_BATCH_BUFFER_END,
- F_LEN_VAR, R_ALL, D_ALL, ADDR_FIX_1(2), 8,
- cmd_handler_mi_conditional_batch_buffer_end},
+ F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, ADDR_FIX_1(2), 8,
+ cmd_handler_mi_conditional_batch_buffer_end, CMD_LEN(2)},
{"MI_LOAD_SCAN_LINES_INCL", OP_MI_LOAD_SCAN_LINES_INCL, F_LEN_CONST,
R_RCS | R_BCS, D_ALL, 0, 2, NULL},
@@ -2245,6 +2552,9 @@ static struct cmd_info cmd_info[] = {
{"OP_3D_MEDIA_0_1_4", OP_3D_MEDIA_0_1_4, F_LEN_VAR, R_RCS, D_ALL,
ADDR_FIX_1(1), 8, NULL},
+ {"OP_SWTESS_BASE_ADDRESS", OP_SWTESS_BASE_ADDRESS,
+ F_LEN_VAR, R_RCS, D_ALL, ADDR_FIX_2(1, 2), 3, NULL},
+
{"3DSTATE_VS", OP_3DSTATE_VS, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
{"3DSTATE_SF", OP_3DSTATE_SF, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
@@ -2264,6 +2574,9 @@ static struct cmd_info cmd_info[] = {
{"MEDIA_STATE_FLUSH", OP_MEDIA_STATE_FLUSH, F_LEN_VAR, R_RCS, D_ALL,
0, 16, NULL},
+ {"MEDIA_POOL_STATE", OP_MEDIA_POOL_STATE, F_LEN_VAR, R_RCS, D_ALL,
+ 0, 16, NULL},
+
{"MEDIA_OBJECT", OP_MEDIA_OBJECT, F_LEN_VAR, R_RCS, D_ALL, 0, 16, NULL},
{"MEDIA_CURBE_LOAD", OP_MEDIA_CURBE_LOAD, F_LEN_VAR, R_RCS, D_ALL,
@@ -2406,7 +2719,7 @@ static struct cmd_info cmd_info[] = {
0, 12, NULL},
{"VEB_DI_IECP", OP_VEB_DNDI_IECP_STATE, F_LEN_VAR, R_VECS, D_BDW_PLUS,
- 0, 20, NULL},
+ 0, 12, NULL},
};
static void add_cmd_entry(struct intel_gvt *gvt, struct cmd_entry *e)
@@ -2418,23 +2731,38 @@ static void add_cmd_entry(struct intel_gvt *gvt, struct cmd_entry *e)
static int cmd_parser_exec(struct parser_exec_state *s)
{
struct intel_vgpu *vgpu = s->vgpu;
- struct cmd_info *info;
+ const struct cmd_info *info;
u32 cmd;
int ret = 0;
cmd = cmd_val(s, 0);
- info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
+ /* fastpath for MI_NOOP */
+ if (cmd == MI_NOOP)
+ info = &cmd_info[mi_noop_index];
+ else
+ info = get_cmd_info(s->vgpu->gvt, cmd, s->engine);
+
if (info == NULL) {
- gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
- cmd, get_opcode(cmd, s->ring_id));
- return -EINVAL;
+ gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %s, workload=%p\n",
+ cmd, get_opcode(cmd, s->engine),
+ repr_addr_type(s->buf_addr_type),
+ s->engine->name, s->workload);
+ return -EBADRQC;
}
s->info = info;
- trace_gvt_command(vgpu->id, s->ring_id, s->ip_gma, s->ip_va,
- cmd_length(s), s->buf_type);
+ trace_gvt_command(vgpu->id, s->engine->id, s->ip_gma, s->ip_va,
+ cmd_length(s), s->buf_type, s->buf_addr_type,
+ s->workload, info->name);
+
+ if ((info->flag & F_LEN_MASK) == F_LEN_VAR_FIXED) {
+ ret = gvt_check_valid_cmd_length(cmd_length(s),
+ info->valid_len);
+ if (ret)
+ return ret;
+ }
if (info->handler) {
ret = info->handler(s);
@@ -2463,6 +2791,10 @@ static inline bool gma_out_of_range(unsigned long gma,
return (gma > gma_tail) && (gma < gma_head);
}
+/* Keep the consistent return type, e.g EBADRQC for unknown
+ * cmd, EFAULT for invalid address, EPERM for nonpriv. later
+ * works as the input of VM healthy status.
+ */
static int command_scan(struct parser_exec_state *s,
unsigned long rb_head, unsigned long rb_tail,
unsigned long rb_start, unsigned long rb_len)
@@ -2477,7 +2809,8 @@ static int command_scan(struct parser_exec_state *s,
gma_bottom = rb_start + rb_len;
while (s->ip_gma != gma_tail) {
- if (s->buf_type == RING_BUFFER_INSTRUCTION) {
+ if (s->buf_type == RING_BUFFER_INSTRUCTION ||
+ s->buf_type == RING_BUFFER_CTX) {
if (!(s->ip_gma >= rb_start) ||
!(s->ip_gma < gma_bottom)) {
gvt_vgpu_err("ip_gma %lx out of ring scope."
@@ -2485,7 +2818,7 @@ static int command_scan(struct parser_exec_state *s,
s->ip_gma, rb_start,
gma_bottom);
parser_exec_state_dump(s);
- return -EINVAL;
+ return -EFAULT;
}
if (gma_out_of_range(s->ip_gma, gma_head, gma_tail)) {
gvt_vgpu_err("ip_gma %lx out of range."
@@ -2509,38 +2842,32 @@ static int command_scan(struct parser_exec_state *s,
static int scan_workload(struct intel_vgpu_workload *workload)
{
- unsigned long gma_head, gma_tail, gma_bottom;
+ unsigned long gma_head, gma_tail;
struct parser_exec_state s;
int ret = 0;
/* ring base is page aligned */
- if (WARN_ON(!IS_ALIGNED(workload->rb_start, GTT_PAGE_SIZE)))
+ if (WARN_ON(!IS_ALIGNED(workload->rb_start, I915_GTT_PAGE_SIZE)))
return -EINVAL;
gma_head = workload->rb_start + workload->rb_head;
gma_tail = workload->rb_start + workload->rb_tail;
- gma_bottom = workload->rb_start + _RING_CTL_BUF_SIZE(workload->rb_ctl);
s.buf_type = RING_BUFFER_INSTRUCTION;
s.buf_addr_type = GTT_BUFFER;
s.vgpu = workload->vgpu;
- s.ring_id = workload->ring_id;
+ s.engine = workload->engine;
s.ring_start = workload->rb_start;
s.ring_size = _RING_CTL_BUF_SIZE(workload->rb_ctl);
s.ring_head = gma_head;
s.ring_tail = gma_tail;
s.rb_va = workload->shadow_ring_buffer_va;
s.workload = workload;
+ s.is_ctx_wa = false;
- if ((bypass_scan_mask & (1 << workload->ring_id)) ||
- gma_head == gma_tail)
+ if (bypass_scan_mask & workload->engine->mask || gma_head == gma_tail)
return 0;
- if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) {
- ret = -EINVAL;
- goto out;
- }
-
ret = ip_gma_set(&s, gma_head);
if (ret)
goto out;
@@ -2555,7 +2882,7 @@ out:
static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
{
- unsigned long gma_head, gma_tail, gma_bottom, ring_size, ring_tail;
+ unsigned long gma_head, gma_tail, ring_size, ring_tail;
struct parser_exec_state s;
int ret = 0;
struct intel_vgpu_workload *workload = container_of(wa_ctx,
@@ -2563,31 +2890,27 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
wa_ctx);
/* ring base is page aligned */
- if (WARN_ON(!IS_ALIGNED(wa_ctx->indirect_ctx.guest_gma, GTT_PAGE_SIZE)))
+ if (WARN_ON(!IS_ALIGNED(wa_ctx->indirect_ctx.guest_gma,
+ I915_GTT_PAGE_SIZE)))
return -EINVAL;
- ring_tail = wa_ctx->indirect_ctx.size + 3 * sizeof(uint32_t);
+ ring_tail = wa_ctx->indirect_ctx.size + 3 * sizeof(u32);
ring_size = round_up(wa_ctx->indirect_ctx.size + CACHELINE_BYTES,
PAGE_SIZE);
gma_head = wa_ctx->indirect_ctx.guest_gma;
gma_tail = wa_ctx->indirect_ctx.guest_gma + ring_tail;
- gma_bottom = wa_ctx->indirect_ctx.guest_gma + ring_size;
s.buf_type = RING_BUFFER_INSTRUCTION;
s.buf_addr_type = GTT_BUFFER;
s.vgpu = workload->vgpu;
- s.ring_id = workload->ring_id;
+ s.engine = workload->engine;
s.ring_start = wa_ctx->indirect_ctx.guest_gma;
s.ring_size = ring_size;
s.ring_head = gma_head;
s.ring_tail = gma_tail;
s.rb_va = wa_ctx->indirect_ctx.shadow_va;
s.workload = workload;
-
- if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) {
- ret = -EINVAL;
- goto out;
- }
+ s.is_ctx_wa = true;
ret = ip_gma_set(&s, gma_head);
if (ret)
@@ -2602,8 +2925,9 @@ out:
static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
+ struct intel_vgpu_submission *s = &vgpu->submission;
unsigned long gma_head, gma_tail, gma_top, guest_rb_size;
- u32 *cs;
+ void *shadow_ring_buffer_va;
int ret;
guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl);
@@ -2616,38 +2940,48 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
gma_tail = workload->rb_start + workload->rb_tail;
gma_top = workload->rb_start + guest_rb_size;
- /* allocate shadow ring buffer */
- cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
- if (IS_ERR(cs))
- return PTR_ERR(cs);
+ if (workload->rb_len > s->ring_scan_buffer_size[workload->engine->id]) {
+ void *p;
+
+ /* realloc the new ring buffer if needed */
+ p = krealloc(s->ring_scan_buffer[workload->engine->id],
+ workload->rb_len, GFP_KERNEL);
+ if (!p) {
+ gvt_vgpu_err("fail to re-alloc ring scan buffer\n");
+ return -ENOMEM;
+ }
+ s->ring_scan_buffer[workload->engine->id] = p;
+ s->ring_scan_buffer_size[workload->engine->id] = workload->rb_len;
+ }
+
+ shadow_ring_buffer_va = s->ring_scan_buffer[workload->engine->id];
/* get shadow ring buffer va */
- workload->shadow_ring_buffer_va = cs;
+ workload->shadow_ring_buffer_va = shadow_ring_buffer_va;
/* head > tail --> copy head <-> top */
if (gma_head > gma_tail) {
ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm,
- gma_head, gma_top, cs);
+ gma_head, gma_top, shadow_ring_buffer_va);
if (ret < 0) {
gvt_vgpu_err("fail to copy guest ring buffer\n");
return ret;
}
- cs += ret / sizeof(u32);
+ shadow_ring_buffer_va += ret;
gma_head = workload->rb_start;
}
/* copy head or start <-> tail */
- ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, cs);
+ ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail,
+ shadow_ring_buffer_va);
if (ret < 0) {
gvt_vgpu_err("fail to copy guest ring buffer\n");
return ret;
}
- cs += ret / sizeof(u32);
- intel_ring_advance(workload->req, cs);
return 0;
}
-int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
+int intel_gvt_scan_and_shadow_ringbuffer(struct intel_vgpu_workload *workload)
{
int ret;
struct intel_vgpu *vgpu = workload->vgpu;
@@ -2678,9 +3012,9 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
int ret = 0;
void *map;
- obj = i915_gem_object_create(workload->vgpu->gvt->dev_priv,
- roundup(ctx_size + CACHELINE_BYTES,
- PAGE_SIZE));
+ obj = i915_gem_object_create_shmem(workload->engine->i915,
+ roundup(ctx_size + CACHELINE_BYTES,
+ PAGE_SIZE));
if (IS_ERR(obj))
return PTR_ERR(obj);
@@ -2692,7 +3026,9 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
goto put_obj;
}
+ i915_gem_object_lock(obj, NULL);
ret = i915_gem_object_set_to_cpu_domain(obj, false);
+ i915_gem_object_unlock(obj);
if (ret) {
gvt_vgpu_err("failed to set shadow indirect ctx to CPU\n");
goto unmap_src;
@@ -2714,15 +3050,18 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
unmap_src:
i915_gem_object_unpin_map(obj);
put_obj:
- i915_gem_object_put(wa_ctx->indirect_ctx.obj);
+ i915_gem_object_put(obj);
return ret;
}
static int combine_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
{
- uint32_t per_ctx_start[CACHELINE_DWORDS] = {0};
+ u32 per_ctx_start[CACHELINE_DWORDS] = {};
unsigned char *bb_start_sva;
+ if (!wa_ctx->per_ctx.valid)
+ return 0;
+
per_ctx_start[0] = 0x18800001;
per_ctx_start[1] = wa_ctx->per_ctx.guest_gma;
@@ -2762,30 +3101,126 @@ int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
return 0;
}
-static struct cmd_info *find_cmd_entry_any_ring(struct intel_gvt *gvt,
- unsigned int opcode, int rings)
+/* generate dummy contexts by sending empty requests to HW, and let
+ * the HW to fill Engine Contexts. This dummy contexts are used for
+ * initialization purpose (update reg whitelist), so referred to as
+ * init context here
+ */
+void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu)
{
- struct cmd_info *info = NULL;
- unsigned int ring;
+ const unsigned long start = LRC_STATE_PN * PAGE_SIZE;
+ struct intel_gvt *gvt = vgpu->gvt;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
- for_each_set_bit(ring, (unsigned long *)&rings, I915_NUM_ENGINES) {
- info = find_cmd_entry(gvt, opcode, ring);
- if (info)
- break;
+ if (gvt->is_reg_whitelist_updated)
+ return;
+
+ /* scan init ctx to update cmd accessible list */
+ for_each_engine(engine, gvt->gt, id) {
+ struct parser_exec_state s;
+ void *vaddr;
+ int ret;
+
+ if (!engine->default_state)
+ continue;
+
+ vaddr = shmem_pin_map(engine->default_state);
+ if (!vaddr) {
+ gvt_err("failed to map %s->default state\n",
+ engine->name);
+ return;
+ }
+
+ s.buf_type = RING_BUFFER_CTX;
+ s.buf_addr_type = GTT_BUFFER;
+ s.vgpu = vgpu;
+ s.engine = engine;
+ s.ring_start = 0;
+ s.ring_size = engine->context_size - start;
+ s.ring_head = 0;
+ s.ring_tail = s.ring_size;
+ s.rb_va = vaddr + start;
+ s.workload = NULL;
+ s.is_ctx_wa = false;
+ s.is_init_ctx = true;
+
+ /* skipping the first RING_CTX_SIZE(0x50) dwords */
+ ret = ip_gma_set(&s, RING_CTX_SIZE);
+ if (ret == 0) {
+ ret = command_scan(&s, 0, s.ring_size, 0, s.ring_size);
+ if (ret)
+ gvt_err("Scan init ctx error\n");
+ }
+
+ shmem_unpin_map(engine->default_state, vaddr);
+ if (ret)
+ return;
}
- return info;
+
+ gvt->is_reg_whitelist_updated = true;
+}
+
+int intel_gvt_scan_engine_context(struct intel_vgpu_workload *workload)
+{
+ struct intel_vgpu *vgpu = workload->vgpu;
+ unsigned long gma_head, gma_tail, gma_start, ctx_size;
+ struct parser_exec_state s;
+ int ring_id = workload->engine->id;
+ struct intel_context *ce = vgpu->submission.shadow[ring_id];
+ int ret;
+
+ GEM_BUG_ON(atomic_read(&ce->pin_count) < 0);
+
+ ctx_size = workload->engine->context_size - PAGE_SIZE;
+
+ /* Only ring contxt is loaded to HW for inhibit context, no need to
+ * scan engine context
+ */
+ if (is_inhibit_context(ce))
+ return 0;
+
+ gma_start = i915_ggtt_offset(ce->state) + LRC_STATE_PN*PAGE_SIZE;
+ gma_head = 0;
+ gma_tail = ctx_size;
+
+ s.buf_type = RING_BUFFER_CTX;
+ s.buf_addr_type = GTT_BUFFER;
+ s.vgpu = workload->vgpu;
+ s.engine = workload->engine;
+ s.ring_start = gma_start;
+ s.ring_size = ctx_size;
+ s.ring_head = gma_start + gma_head;
+ s.ring_tail = gma_start + gma_tail;
+ s.rb_va = ce->lrc_reg_state;
+ s.workload = workload;
+ s.is_ctx_wa = false;
+ s.is_init_ctx = false;
+
+ /* don't scan the first RING_CTX_SIZE(0x50) dwords, as it's ring
+ * context
+ */
+ ret = ip_gma_set(&s, gma_start + gma_head + RING_CTX_SIZE);
+ if (ret)
+ goto out;
+
+ ret = command_scan(&s, gma_head, gma_tail,
+ gma_start, ctx_size);
+out:
+ if (ret)
+ gvt_vgpu_err("scan shadow ctx error\n");
+
+ return ret;
}
static int init_cmd_table(struct intel_gvt *gvt)
{
+ unsigned int gen_type = intel_gvt_get_device_type(gvt);
int i;
- struct cmd_entry *e;
- struct cmd_info *info;
- unsigned int gen_type;
-
- gen_type = intel_gvt_get_device_type(gvt);
for (i = 0; i < ARRAY_SIZE(cmd_info); i++) {
+ struct cmd_entry *e;
+
if (!(cmd_info[i].devices & gen_type))
continue;
@@ -2794,20 +3229,16 @@ static int init_cmd_table(struct intel_gvt *gvt)
return -ENOMEM;
e->info = &cmd_info[i];
- info = find_cmd_entry_any_ring(gvt,
- e->info->opcode, e->info->rings);
- if (info) {
- gvt_err("%s %s duplicated\n", e->info->name,
- info->name);
- return -EEXIST;
- }
+ if (cmd_info[i].opcode == OP_MI_NOOP)
+ mi_noop_index = i;
INIT_HLIST_NODE(&e->hlist);
add_cmd_entry(gvt, e);
gvt_dbg_cmd("add %-30s op %04x flag %x devs %02x rings %02x\n",
- e->info->name, e->info->opcode, e->info->flag,
- e->info->devices, e->info->rings);
+ e->info->name, e->info->opcode, e->info->flag,
+ e->info->devices, e->info->rings);
}
+
return 0;
}