diff options
Diffstat (limited to 'arch/x86/kvm/emulate.c')
| -rw-r--r-- | arch/x86/kvm/emulate.c | 1199 |
1 files changed, 667 insertions, 532 deletions
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 936a397a08cd..c8e292e9a24d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -26,6 +26,7 @@ #include <asm/debugreg.h> #include <asm/nospec-branch.h> #include <asm/ibt.h> +#include <asm/text-patching.h> #include "x86.h" #include "tss.h" @@ -80,9 +81,8 @@ */ /* Operand sizes: 8-bit operands or specified/overridden size. */ -#define ByteOp (1<<0) /* 8-bit operands. */ -/* Destination operand type. */ -#define DstShift 1 +#define ByteOp (1<<0) /* 8-bit operands. */ +#define DstShift 1 /* Destination operand type at bits 1-5 */ #define ImplicitOps (OpImplicit << DstShift) #define DstReg (OpReg << DstShift) #define DstMem (OpMem << DstShift) @@ -94,8 +94,7 @@ #define DstDX (OpDX << DstShift) #define DstAccLo (OpAccLo << DstShift) #define DstMask (OpMask << DstShift) -/* Source operand type. */ -#define SrcShift 6 +#define SrcShift 6 /* Source operand type at bits 6-10 */ #define SrcNone (OpNone << SrcShift) #define SrcReg (OpReg << SrcShift) #define SrcMem (OpMem << SrcShift) @@ -118,10 +117,10 @@ #define SrcAccHi (OpAccHi << SrcShift) #define SrcMask (OpMask << SrcShift) #define BitOp (1<<11) -#define MemAbs (1<<12) /* Memory operand is absolute displacement */ +#define MemAbs (1<<12) /* Memory operand is absolute displacement */ #define String (1<<13) /* String instruction (rep capable) */ #define Stack (1<<14) /* Stack instruction (push/pop) */ -#define GroupMask (7<<15) /* Opcode uses one of the group mechanisms */ +#define GroupMask (7<<15) /* Group mechanisms, at bits 15-17 */ #define Group (1<<15) /* Bits 3:5 of modrm byte extend opcode */ #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */ #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */ @@ -130,11 +129,8 @@ #define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */ #define ModeDual (7<<15) /* Different instruction for 32/64 bit */ #define Sse (1<<18) /* SSE Vector instruction */ -/* Generic ModRM decode. */ -#define ModRM (1<<19) -/* Destination is only written; never read. */ -#define Mov (1<<20) -/* Misc flags */ +#define ModRM (1<<19) /* Generic ModRM decode. */ +#define Mov (1<<20) /* Destination is only written; never read. */ #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ #define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */ #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ @@ -142,11 +138,11 @@ #define Undefined (1<<25) /* No Such Instruction */ #define Lock (1<<26) /* lock prefix is allowed for the instruction */ #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ -#define No64 (1<<28) +#define No64 (1<<28) /* Instruction generates #UD in 64-bit mode */ #define PageTable (1 << 29) /* instruction used to write page table */ #define NotImpl (1 << 30) /* instruction is not implemented */ -/* Source 2 operand type */ -#define Src2Shift (31) +#define Avx ((u64)1 << 31) /* Instruction uses VEX prefix */ +#define Src2Shift (32) /* Source 2 operand type at bits 32-36 */ #define Src2None (OpNone << Src2Shift) #define Src2Mem (OpMem << Src2Shift) #define Src2CL (OpCL << Src2Shift) @@ -160,13 +156,13 @@ #define Src2FS (OpFS << Src2Shift) #define Src2GS (OpGS << Src2Shift) #define Src2Mask (OpMask << Src2Shift) +/* free: 37-39 */ #define Mmx ((u64)1 << 40) /* MMX Vector instruction */ -#define AlignMask ((u64)7 << 41) +#define AlignMask ((u64)3 << 41) /* Memory alignment requirement at bits 41-42 */ #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */ #define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */ -#define Avx ((u64)3 << 41) /* Advanced Vector Extensions */ -#define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */ -#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */ +#define Aligned16 ((u64)3 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */ +/* free: 43-44 */ #define NoWrite ((u64)1 << 45) /* No writeback */ #define SrcWrite ((u64)1 << 46) /* Write back src operand */ #define NoMod ((u64)1 << 47) /* Mod field is ignored */ @@ -178,6 +174,7 @@ #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */ #define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */ #define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */ +#define ShadowStack ((u64)1 << 57) /* Instruction affects Shadow Stacks. */ #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) @@ -202,7 +199,6 @@ struct opcode { const struct escape *esc; const struct instr_dual *idual; const struct mode_dual *mdual; - void (*fastop)(struct fastop *fake); } u; int (*check_perm)(struct x86_emulate_ctxt *ctxt); }; @@ -243,6 +239,13 @@ enum x86_transfer_type { X86_TRANSFER_TASK_SWITCH, }; +enum rex_bits { + REX_B = 1, + REX_X = 2, + REX_R = 4, + REX_W = 8, +}; + static void writeback_registers(struct x86_emulate_ctxt *ctxt) { unsigned long dirty = ctxt->regs_dirty; @@ -266,186 +269,130 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) X86_EFLAGS_PF|X86_EFLAGS_CF) #ifdef CONFIG_X86_64 -#define ON64(x) x +#define ON64(x...) x #else -#define ON64(x) +#define ON64(x...) #endif -/* - * fastop functions have a special calling convention: - * - * dst: rax (in/out) - * src: rdx (in/out) - * src2: rcx (in) - * flags: rflags (in/out) - * ex: rsi (in:fastop pointer, out:zero if exception) - * - * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for - * different operand sizes can be reached by calculation, rather than a jump - * table (which would be bigger than the code). - * - * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR - * and 1 for the straight line speculation INT3, leaves 7 bytes for the - * body of the function. Currently none is larger than 4. - */ -static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); - -#define FASTOP_SIZE 16 - -#define __FOP_FUNC(name) \ - ".align " __stringify(FASTOP_SIZE) " \n\t" \ - ".type " name ", @function \n\t" \ - name ":\n\t" \ - ASM_ENDBR \ - IBT_NOSEAL(name) - -#define FOP_FUNC(name) \ - __FOP_FUNC(#name) - -#define __FOP_RET(name) \ - "11: " ASM_RET \ - ".size " name ", .-" name "\n\t" - -#define FOP_RET(name) \ - __FOP_RET(#name) - -#define __FOP_START(op, align) \ - extern void em_##op(struct fastop *fake); \ - asm(".pushsection .text, \"ax\" \n\t" \ - ".global em_" #op " \n\t" \ - ".align " __stringify(align) " \n\t" \ - "em_" #op ":\n\t" - -#define FOP_START(op) __FOP_START(op, FASTOP_SIZE) - -#define FOP_END \ - ".popsection") - -#define __FOPNOP(name) \ - __FOP_FUNC(name) \ - __FOP_RET(name) - -#define FOPNOP() \ - __FOPNOP(__stringify(__UNIQUE_ID(nop))) - -#define FOP1E(op, dst) \ - __FOP_FUNC(#op "_" #dst) \ - "10: " #op " %" #dst " \n\t" \ - __FOP_RET(#op "_" #dst) - -#define FOP1EEX(op, dst) \ - FOP1E(op, dst) _ASM_EXTABLE_TYPE_REG(10b, 11b, EX_TYPE_ZERO_REG, %%esi) - -#define FASTOP1(op) \ - FOP_START(op) \ - FOP1E(op##b, al) \ - FOP1E(op##w, ax) \ - FOP1E(op##l, eax) \ - ON64(FOP1E(op##q, rax)) \ - FOP_END - -/* 1-operand, using src2 (for MUL/DIV r/m) */ -#define FASTOP1SRC2(op, name) \ - FOP_START(name) \ - FOP1E(op, cl) \ - FOP1E(op, cx) \ - FOP1E(op, ecx) \ - ON64(FOP1E(op, rcx)) \ - FOP_END - -/* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */ -#define FASTOP1SRC2EX(op, name) \ - FOP_START(name) \ - FOP1EEX(op, cl) \ - FOP1EEX(op, cx) \ - FOP1EEX(op, ecx) \ - ON64(FOP1EEX(op, rcx)) \ - FOP_END - -#define FOP2E(op, dst, src) \ - __FOP_FUNC(#op "_" #dst "_" #src) \ - #op " %" #src ", %" #dst " \n\t" \ - __FOP_RET(#op "_" #dst "_" #src) - -#define FASTOP2(op) \ - FOP_START(op) \ - FOP2E(op##b, al, dl) \ - FOP2E(op##w, ax, dx) \ - FOP2E(op##l, eax, edx) \ - ON64(FOP2E(op##q, rax, rdx)) \ - FOP_END - -/* 2 operand, word only */ -#define FASTOP2W(op) \ - FOP_START(op) \ - FOPNOP() \ - FOP2E(op##w, ax, dx) \ - FOP2E(op##l, eax, edx) \ - ON64(FOP2E(op##q, rax, rdx)) \ - FOP_END - -/* 2 operand, src is CL */ -#define FASTOP2CL(op) \ - FOP_START(op) \ - FOP2E(op##b, al, cl) \ - FOP2E(op##w, ax, cl) \ - FOP2E(op##l, eax, cl) \ - ON64(FOP2E(op##q, rax, cl)) \ - FOP_END - -/* 2 operand, src and dest are reversed */ -#define FASTOP2R(op, name) \ - FOP_START(name) \ - FOP2E(op##b, dl, al) \ - FOP2E(op##w, dx, ax) \ - FOP2E(op##l, edx, eax) \ - ON64(FOP2E(op##q, rdx, rax)) \ - FOP_END - -#define FOP3E(op, dst, src, src2) \ - __FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \ - #op " %" #src2 ", %" #src ", %" #dst " \n\t"\ - __FOP_RET(#op "_" #dst "_" #src "_" #src2) - -/* 3-operand, word-only, src2=cl */ -#define FASTOP3WCL(op) \ - FOP_START(op) \ - FOPNOP() \ - FOP3E(op##w, ax, dx, cl) \ - FOP3E(op##l, eax, edx, cl) \ - ON64(FOP3E(op##q, rax, rdx, cl)) \ - FOP_END - -/* Special case for SETcc - 1 instruction per cc */ -#define FOP_SETCC(op) \ - FOP_FUNC(op) \ - #op " %al \n\t" \ - FOP_RET(op) - -FOP_START(setcc) -FOP_SETCC(seto) -FOP_SETCC(setno) -FOP_SETCC(setc) -FOP_SETCC(setnc) -FOP_SETCC(setz) -FOP_SETCC(setnz) -FOP_SETCC(setbe) -FOP_SETCC(setnbe) -FOP_SETCC(sets) -FOP_SETCC(setns) -FOP_SETCC(setp) -FOP_SETCC(setnp) -FOP_SETCC(setl) -FOP_SETCC(setnl) -FOP_SETCC(setle) -FOP_SETCC(setnle) -FOP_END; - -FOP_START(salc) -FOP_FUNC(salc) -"pushf; sbb %al, %al; popf \n\t" -FOP_RET(salc) -FOP_END; +#define EM_ASM_START(op) \ +static int em_##op(struct x86_emulate_ctxt *ctxt) \ +{ \ + unsigned long flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; \ + int bytes = 1, ok = 1; \ + if (!(ctxt->d & ByteOp)) \ + bytes = ctxt->dst.bytes; \ + switch (bytes) { + +#define __EM_ASM(str) \ + asm("push %[flags]; popf \n\t" \ + "10: " str \ + "pushf; pop %[flags] \n\t" \ + "11: \n\t" \ + : "+a" (ctxt->dst.val), \ + "+d" (ctxt->src.val), \ + [flags] "+D" (flags), \ + "+S" (ok) \ + : "c" (ctxt->src2.val)) + +#define __EM_ASM_1(op, dst) \ + __EM_ASM(#op " %%" #dst " \n\t") + +#define __EM_ASM_1_EX(op, dst) \ + __EM_ASM(#op " %%" #dst " \n\t" \ + _ASM_EXTABLE_TYPE_REG(10b, 11f, EX_TYPE_ZERO_REG, %%esi)) + +#define __EM_ASM_2(op, dst, src) \ + __EM_ASM(#op " %%" #src ", %%" #dst " \n\t") + +#define __EM_ASM_3(op, dst, src, src2) \ + __EM_ASM(#op " %%" #src2 ", %%" #src ", %%" #dst " \n\t") + +#define EM_ASM_END \ + } \ + ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); \ + return !ok ? emulate_de(ctxt) : X86EMUL_CONTINUE; \ +} + +/* 1-operand, using "a" (dst) */ +#define EM_ASM_1(op) \ + EM_ASM_START(op) \ + case 1: __EM_ASM_1(op##b, al); break; \ + case 2: __EM_ASM_1(op##w, ax); break; \ + case 4: __EM_ASM_1(op##l, eax); break; \ + ON64(case 8: __EM_ASM_1(op##q, rax); break;) \ + EM_ASM_END + +/* 1-operand, using "c" (src2) */ +#define EM_ASM_1SRC2(op, name) \ + EM_ASM_START(name) \ + case 1: __EM_ASM_1(op##b, cl); break; \ + case 2: __EM_ASM_1(op##w, cx); break; \ + case 4: __EM_ASM_1(op##l, ecx); break; \ + ON64(case 8: __EM_ASM_1(op##q, rcx); break;) \ + EM_ASM_END + +/* 1-operand, using "c" (src2) with exception */ +#define EM_ASM_1SRC2EX(op, name) \ + EM_ASM_START(name) \ + case 1: __EM_ASM_1_EX(op##b, cl); break; \ + case 2: __EM_ASM_1_EX(op##w, cx); break; \ + case 4: __EM_ASM_1_EX(op##l, ecx); break; \ + ON64(case 8: __EM_ASM_1_EX(op##q, rcx); break;) \ + EM_ASM_END + +/* 2-operand, using "a" (dst), "d" (src) */ +#define EM_ASM_2(op) \ + EM_ASM_START(op) \ + case 1: __EM_ASM_2(op##b, al, dl); break; \ + case 2: __EM_ASM_2(op##w, ax, dx); break; \ + case 4: __EM_ASM_2(op##l, eax, edx); break; \ + ON64(case 8: __EM_ASM_2(op##q, rax, rdx); break;) \ + EM_ASM_END + +/* 2-operand, reversed */ +#define EM_ASM_2R(op, name) \ + EM_ASM_START(name) \ + case 1: __EM_ASM_2(op##b, dl, al); break; \ + case 2: __EM_ASM_2(op##w, dx, ax); break; \ + case 4: __EM_ASM_2(op##l, edx, eax); break; \ + ON64(case 8: __EM_ASM_2(op##q, rdx, rax); break;) \ + EM_ASM_END + +/* 2-operand, word only (no byte op) */ +#define EM_ASM_2W(op) \ + EM_ASM_START(op) \ + case 1: break; \ + case 2: __EM_ASM_2(op##w, ax, dx); break; \ + case 4: __EM_ASM_2(op##l, eax, edx); break; \ + ON64(case 8: __EM_ASM_2(op##q, rax, rdx); break;) \ + EM_ASM_END + +/* 2-operand, using "a" (dst) and CL (src2) */ +#define EM_ASM_2CL(op) \ + EM_ASM_START(op) \ + case 1: __EM_ASM_2(op##b, al, cl); break; \ + case 2: __EM_ASM_2(op##w, ax, cl); break; \ + case 4: __EM_ASM_2(op##l, eax, cl); break; \ + ON64(case 8: __EM_ASM_2(op##q, rax, cl); break;) \ + EM_ASM_END + +/* 3-operand, using "a" (dst), "d" (src) and CL (src2) */ +#define EM_ASM_3WCL(op) \ + EM_ASM_START(op) \ + case 1: break; \ + case 2: __EM_ASM_3(op##w, ax, dx, cl); break; \ + case 4: __EM_ASM_3(op##l, eax, edx, cl); break; \ + ON64(case 8: __EM_ASM_3(op##q, rax, rdx, cl); break;) \ + EM_ASM_END + +static int em_salc(struct x86_emulate_ctxt *ctxt) +{ + /* + * Set AL 0xFF if CF is set, or 0x00 when clear. + */ + ctxt->dst.val = 0xFF * !!(ctxt->eflags & X86_EFLAGS_CF); + return X86EMUL_CONTINUE; +} /* * XXX: inoutclob user must know where the argument is being expanded. @@ -477,8 +424,11 @@ static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, .dst_val = ctxt->dst.val64, .src_bytes = ctxt->src.bytes, .dst_bytes = ctxt->dst.bytes, + .src_type = ctxt->src.type, + .dst_type = ctxt->dst.type, .ad_bytes = ctxt->ad_bytes, - .next_rip = ctxt->eip, + .rip = ctxt->eip, + .next_rip = ctxt->_eip, }; return ctxt->ops->intercept(ctxt, &info, stage); @@ -651,9 +601,10 @@ static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt) } static inline bool emul_is_noncanonical_address(u64 la, - struct x86_emulate_ctxt *ctxt) + struct x86_emulate_ctxt *ctxt, + unsigned int flags) { - return !__is_canonical_address(la, ctxt_virt_addr_bits(ctxt)); + return !ctxt->ops->is_canonical_addr(ctxt, la, flags); } /* @@ -674,7 +625,6 @@ static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size) switch (alignment) { case Unaligned: - case Avx: return 1; case Aligned16: return 16; @@ -687,8 +637,8 @@ static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size) static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, struct segmented_address addr, unsigned *max_size, unsigned size, - bool write, bool fetch, - enum x86emul_mode mode, ulong *linear) + enum x86emul_mode mode, ulong *linear, + unsigned int flags) { struct desc_struct desc; bool usable; @@ -701,7 +651,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, *max_size = 0; switch (mode) { case X86EMUL_MODE_PROT64: - *linear = la; + *linear = la = ctxt->ops->get_untagged_addr(ctxt, la, flags); va_bits = ctxt_virt_addr_bits(ctxt); if (!__is_canonical_address(la, va_bits)) goto bad; @@ -717,11 +667,11 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, if (!usable) goto bad; /* code segment in protected mode or read-only data segment */ - if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8)) - || !(desc.type & 2)) && write) + if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8)) || !(desc.type & 2)) && + (flags & X86EMUL_F_WRITE)) goto bad; /* unreadable code segment */ - if (!fetch && (desc.type & 8) && !(desc.type & 2)) + if (!(flags & X86EMUL_F_FETCH) && (desc.type & 8) && !(desc.type & 2)) goto bad; lim = desc_limit_scaled(&desc); if (!(desc.type & 8) && (desc.type & 4)) { @@ -757,8 +707,8 @@ static int linearize(struct x86_emulate_ctxt *ctxt, ulong *linear) { unsigned max_size; - return __linearize(ctxt, addr, &max_size, size, write, false, - ctxt->mode, linear); + return __linearize(ctxt, addr, &max_size, size, ctxt->mode, linear, + write ? X86EMUL_F_WRITE : 0); } static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst) @@ -771,7 +721,8 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst) if (ctxt->op_bytes != sizeof(unsigned long)) addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1); - rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear); + rc = __linearize(ctxt, addr, &max_size, 1, ctxt->mode, &linear, + X86EMUL_F_FETCH); if (rc == X86EMUL_CONTINUE) ctxt->_eip = addr.ea; return rc; @@ -907,8 +858,8 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) * boundary check itself. Instead, we use max_size to check * against op_size. */ - rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode, - &linear); + rc = __linearize(ctxt, addr, &max_size, 0, ctxt->mode, &linear, + X86EMUL_F_FETCH); if (unlikely(rc != X86EMUL_CONTINUE)) return rc; @@ -975,7 +926,7 @@ static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg, int byteop) { void *p; - int highbyte_regs = (ctxt->rex_prefix == 0) && byteop; + int highbyte_regs = (ctxt->rex_prefix == REX_NONE) && byteop; if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8) p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1; @@ -1001,56 +952,55 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, return rc; } -FASTOP2(add); -FASTOP2(or); -FASTOP2(adc); -FASTOP2(sbb); -FASTOP2(and); -FASTOP2(sub); -FASTOP2(xor); -FASTOP2(cmp); -FASTOP2(test); - -FASTOP1SRC2(mul, mul_ex); -FASTOP1SRC2(imul, imul_ex); -FASTOP1SRC2EX(div, div_ex); -FASTOP1SRC2EX(idiv, idiv_ex); - -FASTOP3WCL(shld); -FASTOP3WCL(shrd); - -FASTOP2W(imul); - -FASTOP1(not); -FASTOP1(neg); -FASTOP1(inc); -FASTOP1(dec); - -FASTOP2CL(rol); -FASTOP2CL(ror); -FASTOP2CL(rcl); -FASTOP2CL(rcr); -FASTOP2CL(shl); -FASTOP2CL(shr); -FASTOP2CL(sar); - -FASTOP2W(bsf); -FASTOP2W(bsr); -FASTOP2W(bt); -FASTOP2W(bts); -FASTOP2W(btr); -FASTOP2W(btc); - -FASTOP2(xadd); - -FASTOP2R(cmp, cmp_r); +EM_ASM_2(add); +EM_ASM_2(or); +EM_ASM_2(adc); +EM_ASM_2(sbb); +EM_ASM_2(and); +EM_ASM_2(sub); +EM_ASM_2(xor); +EM_ASM_2(cmp); +EM_ASM_2(test); +EM_ASM_2(xadd); + +EM_ASM_1SRC2(mul, mul_ex); +EM_ASM_1SRC2(imul, imul_ex); +EM_ASM_1SRC2EX(div, div_ex); +EM_ASM_1SRC2EX(idiv, idiv_ex); + +EM_ASM_3WCL(shld); +EM_ASM_3WCL(shrd); + +EM_ASM_2W(imul); + +EM_ASM_1(not); +EM_ASM_1(neg); +EM_ASM_1(inc); +EM_ASM_1(dec); + +EM_ASM_2CL(rol); +EM_ASM_2CL(ror); +EM_ASM_2CL(rcl); +EM_ASM_2CL(rcr); +EM_ASM_2CL(shl); +EM_ASM_2CL(shr); +EM_ASM_2CL(sar); + +EM_ASM_2W(bsf); +EM_ASM_2W(bsr); +EM_ASM_2W(bt); +EM_ASM_2W(bts); +EM_ASM_2W(btr); +EM_ASM_2W(btc); + +EM_ASM_2R(cmp, cmp_r); static int em_bsf_c(struct x86_emulate_ctxt *ctxt) { /* If src is zero, do not writeback, but update flags */ if (ctxt->src.val == 0) ctxt->dst.type = OP_NONE; - return fastop(ctxt, em_bsf); + return em_bsf(ctxt); } static int em_bsr_c(struct x86_emulate_ctxt *ctxt) @@ -1058,18 +1008,12 @@ static int em_bsr_c(struct x86_emulate_ctxt *ctxt) /* If src is zero, do not writeback, but update flags */ if (ctxt->src.val == 0) ctxt->dst.type = OP_NONE; - return fastop(ctxt, em_bsr); + return em_bsr(ctxt); } static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) { - u8 rc; - void (*fop)(void) = (void *)em_setcc + FASTOP_SIZE * (condition & 0xf); - - flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; - asm("push %[flags]; popf; " CALL_NOSPEC - : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags)); - return rc; + return __emulate_cc(flags, condition & 0xf); } static void fetch_register_operand(struct operand *op) @@ -1088,6 +1032,7 @@ static void fetch_register_operand(struct operand *op) op->val = *(u64 *)op->addr.reg; break; } + op->orig_val = op->val; } static int em_fninit(struct x86_emulate_ctxt *ctxt) @@ -1133,17 +1078,17 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -static void decode_register_operand(struct x86_emulate_ctxt *ctxt, - struct operand *op) +static void __decode_register_operand(struct x86_emulate_ctxt *ctxt, + struct operand *op, int reg) { - unsigned int reg; - - if (ctxt->d & ModRM) - reg = ctxt->modrm_reg; - else - reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3); - - if (ctxt->d & Sse) { + if ((ctxt->d & Avx) && ctxt->op_bytes == 32) { + op->type = OP_YMM; + op->bytes = 32; + op->addr.xmm = reg; + kvm_read_avx_reg(reg, &op->vec_val2); + return; + } + if (ctxt->d & (Avx|Sse)) { op->type = OP_XMM; op->bytes = 16; op->addr.xmm = reg; @@ -1161,9 +1106,20 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt, op->type = OP_REG; op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp); - fetch_register_operand(op); - op->orig_val = op->val; +} + +static void decode_register_operand(struct x86_emulate_ctxt *ctxt, + struct operand *op) +{ + unsigned int reg; + + if (ctxt->d & ModRM) + reg = ctxt->modrm_reg; + else + reg = (ctxt->b & 7) | (ctxt->rex_bits & REX_B ? 8 : 0); + + __decode_register_operand(ctxt, op, reg); } static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg) @@ -1180,9 +1136,9 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, int rc = X86EMUL_CONTINUE; ulong modrm_ea = 0; - ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */ - index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */ - base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */ + ctxt->modrm_reg = (ctxt->rex_bits & REX_R ? 8 : 0); + index_reg = (ctxt->rex_bits & REX_X ? 8 : 0); + base_reg = (ctxt->rex_bits & REX_B ? 8 : 0); ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6; ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3; @@ -1190,24 +1146,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, ctxt->modrm_seg = VCPU_SREG_DS; if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) { - op->type = OP_REG; - op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; - op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, - ctxt->d & ByteOp); - if (ctxt->d & Sse) { - op->type = OP_XMM; - op->bytes = 16; - op->addr.xmm = ctxt->modrm_rm; - kvm_read_sse_reg(ctxt->modrm_rm, &op->vec_val); - return rc; - } - if (ctxt->d & Mmx) { - op->type = OP_MM; - op->bytes = 8; - op->addr.mm = ctxt->modrm_rm & 7; - return rc; - } - fetch_register_operand(op); + __decode_register_operand(ctxt, op, ctxt->modrm_rm); return rc; } @@ -1548,6 +1487,37 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, return linear_write_system(ctxt, addr, desc, sizeof(*desc)); } +static bool emulator_is_ssp_invalid(struct x86_emulate_ctxt *ctxt, u8 cpl) +{ + const u32 MSR_IA32_X_CET = cpl == 3 ? MSR_IA32_U_CET : MSR_IA32_S_CET; + u64 efer = 0, cet = 0, ssp = 0; + + if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET)) + return false; + + if (ctxt->ops->get_msr(ctxt, MSR_EFER, &efer)) + return true; + + /* SSP is guaranteed to be valid if the vCPU was already in 32-bit mode. */ + if (!(efer & EFER_LMA)) + return false; + + if (ctxt->ops->get_msr(ctxt, MSR_IA32_X_CET, &cet)) + return true; + + if (!(cet & CET_SHSTK_EN)) + return false; + + if (ctxt->ops->get_msr(ctxt, MSR_KVM_INTERNAL_GUEST_SSP, &ssp)) + return true; + + /* + * On transfer from 64-bit mode to compatibility mode, SSP[63:32] must + * be 0, i.e. SSP must be a 32-bit value outside of 64-bit mode. + */ + return ssp >> 32; +} + static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, u16 selector, int seg, u8 cpl, enum x86_transfer_type transfer, @@ -1688,6 +1658,10 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (efer & EFER_LMA) goto exception; } + if (!seg_desc.l && emulator_is_ssp_invalid(ctxt, cpl)) { + err_code = 0; + goto exception; + } /* CS(RPL) <- CPL */ selector = (selector & 0xfffc) | cpl; @@ -1732,7 +1706,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (ret != X86EMUL_CONTINUE) return ret; if (emul_is_noncanonical_address(get_desc_base(&seg_desc) | - ((u64)base3 << 32), ctxt)) + ((u64)base3 << 32), ctxt, + X86EMUL_F_DT_LOAD)) return emulate_gp(ctxt, err_code); } @@ -1799,15 +1774,21 @@ static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op) op->addr.mem, &op->val, op->bytes); - break; case OP_MEM_STR: return segmented_write(ctxt, op->addr.mem, op->data, op->bytes * op->count); - break; case OP_XMM: - kvm_write_sse_reg(op->addr.xmm, &op->vec_val); + if (!(ctxt->d & Avx)) { + kvm_write_sse_reg(op->addr.xmm, &op->vec_val); + break; + } + /* full YMM write but with high bytes cleared */ + memset(op->valptr + 16, 0, 16); + fallthrough; + case OP_YMM: + kvm_write_avx_reg(op->addr.xmm, &op->vec_val2); break; case OP_MM: kvm_write_mmx_reg(op->addr.mm, &op->mm_val); @@ -1821,22 +1802,22 @@ static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op) return X86EMUL_CONTINUE; } -static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes) +static int emulate_push(struct x86_emulate_ctxt *ctxt, const void *data, int len) { struct segmented_address addr; - rsp_increment(ctxt, -bytes); + rsp_increment(ctxt, -len); addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt); addr.seg = VCPU_SREG_SS; - return segmented_write(ctxt, addr, data, bytes); + return segmented_write(ctxt, addr, data, len); } static int em_push(struct x86_emulate_ctxt *ctxt) { /* Disable writeback. */ ctxt->dst.type = OP_NONE; - return push(ctxt, &ctxt->src.val, ctxt->op_bytes); + return emulate_push(ctxt, &ctxt->src.val, ctxt->op_bytes); } static int emulate_pop(struct x86_emulate_ctxt *ctxt, @@ -1864,7 +1845,8 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, void *dest, int len) { int rc; - unsigned long val, change_mask; + unsigned long val = 0; + unsigned long change_mask; int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT; int cpl = ctxt->ops->cpl(ctxt); @@ -1921,7 +1903,7 @@ static int em_enter(struct x86_emulate_ctxt *ctxt) return X86EMUL_UNHANDLEABLE; rbp = reg_read(ctxt, VCPU_REGS_RBP); - rc = push(ctxt, &rbp, stack_size(ctxt)); + rc = emulate_push(ctxt, &rbp, stack_size(ctxt)); if (rc != X86EMUL_CONTINUE) return rc; assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP), @@ -1955,7 +1937,7 @@ static int em_push_sreg(struct x86_emulate_ctxt *ctxt) static int em_pop_sreg(struct x86_emulate_ctxt *ctxt) { int seg = ctxt->src2.val; - unsigned long selector; + unsigned long selector = 0; int rc; rc = emulate_pop(ctxt, &selector, 2); @@ -2001,7 +1983,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) { int rc = X86EMUL_CONTINUE; int reg = VCPU_REGS_RDI; - u32 val; + u32 val = 0; while (reg >= VCPU_REGS_RAX) { if (reg == VCPU_REGS_RSP) { @@ -2230,7 +2212,7 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) static int em_ret(struct x86_emulate_ctxt *ctxt) { int rc; - unsigned long eip; + unsigned long eip = 0; rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) @@ -2242,7 +2224,8 @@ static int em_ret(struct x86_emulate_ctxt *ctxt) static int em_ret_far(struct x86_emulate_ctxt *ctxt) { int rc; - unsigned long eip, cs; + unsigned long eip = 0; + unsigned long cs = 0; int cpl = ctxt->ops->cpl(ctxt); struct desc_struct new_desc; @@ -2283,7 +2266,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX); ctxt->src.orig_val = ctxt->src.val; ctxt->src.val = ctxt->dst.orig_val; - fastop(ctxt, em_cmp); + em_cmp(ctxt); if (ctxt->eflags & X86_EFLAGS_ZF) { /* Success: write back to memory; no update of EAX */ @@ -2353,50 +2336,6 @@ setup_syscalls_segments(struct desc_struct *cs, struct desc_struct *ss) ss->avl = 0; } -static bool vendor_intel(struct x86_emulate_ctxt *ctxt) -{ - u32 eax, ebx, ecx, edx; - - eax = ecx = 0; - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true); - return is_guest_vendor_intel(ebx, ecx, edx); -} - -static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt) -{ - const struct x86_emulate_ops *ops = ctxt->ops; - u32 eax, ebx, ecx, edx; - - /* - * syscall should always be enabled in longmode - so only become - * vendor specific (cpuid) if other modes are active... - */ - if (ctxt->mode == X86EMUL_MODE_PROT64) - return true; - - eax = 0x00000000; - ecx = 0x00000000; - ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true); - /* - * remark: Intel CPUs only support "syscall" in 64bit longmode. Also a - * 64bit guest with a 32bit compat-app running will #UD !! While this - * behaviour can be fixed (by emulating) into AMD response - CPUs of - * AMD can't behave like Intel. - */ - if (is_guest_vendor_intel(ebx, ecx, edx)) - return false; - - if (is_guest_vendor_amd(ebx, ecx, edx) || - is_guest_vendor_hygon(ebx, ecx, edx)) - return true; - - /* - * default: (not Intel, not AMD, not Hygon), apply Intel's - * stricter rules... - */ - return false; -} - static int em_syscall(struct x86_emulate_ctxt *ctxt) { const struct x86_emulate_ops *ops = ctxt->ops; @@ -2410,7 +2349,15 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) ctxt->mode == X86EMUL_MODE_VM86) return emulate_ud(ctxt); - if (!(em_syscall_is_enabled(ctxt))) + /* + * Intel compatible CPUs only support SYSCALL in 64-bit mode, whereas + * AMD allows SYSCALL in any flavor of protected mode. Note, it's + * infeasible to emulate Intel behavior when running on AMD hardware, + * as SYSCALL won't fault in the "wrong" mode, i.e. there is no #UD + * for KVM to trap-and-emulate, unlike emulating AMD on Intel. + */ + if (ctxt->mode != X86EMUL_MODE_PROT64 && + ctxt->ops->guest_cpuid_is_intel_compatible(ctxt)) return emulate_ud(ctxt); ops->get_msr(ctxt, MSR_EFER, &efer); @@ -2470,11 +2417,11 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) return emulate_gp(ctxt, 0); /* - * Not recognized on AMD in compat mode (but is recognized in legacy - * mode). + * Intel's architecture allows SYSENTER in compatibility mode, but AMD + * does not. Note, AMD does allow SYSENTER in legacy protected mode. */ - if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA) - && !vendor_intel(ctxt)) + if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA) && + !ctxt->ops->guest_cpuid_is_intel_compatible(ctxt)) return emulate_ud(ctxt); /* sysenter/sysexit have not been tested in 64bit mode. */ @@ -2524,7 +2471,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) setup_syscalls_segments(&cs, &ss); - if ((ctxt->rex_prefix & 0x8) != 0x0) + if (ctxt->rex_bits & REX_W) usermode = X86EMUL_MODE_PROT64; else usermode = X86EMUL_MODE_PROT32; @@ -2551,8 +2498,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ss_sel = cs_sel + 8; cs.d = 0; cs.l = 1; - if (emul_is_noncanonical_address(rcx, ctxt) || - emul_is_noncanonical_address(rdx, ctxt)) + if (emul_is_noncanonical_address(rcx, ctxt, 0) || + emul_is_noncanonical_address(rdx, ctxt, 0)) return emulate_gp(ctxt, 0); break; } @@ -2646,7 +2593,14 @@ static void string_registers_quirk(struct x86_emulate_ctxt *ctxt) * manner when ECX is zero due to REP-string optimizations. */ #ifdef CONFIG_X86_64 - if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt)) + u32 eax, ebx, ecx, edx; + + if (ctxt->ad_bytes != 4) + return; + + eax = ecx = 0; + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true); + if (!is_guest_vendor_intel(ebx, ecx, edx)) return; *reg_write(ctxt, VCPU_REGS_RCX) = 0; @@ -3012,7 +2966,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, ret = em_push(ctxt); } - ops->get_dr(ctxt, 7, &dr7); + dr7 = ops->get_dr(ctxt, 7); ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN)); return ret; @@ -3077,7 +3031,7 @@ static int em_das(struct x86_emulate_ctxt *ctxt) ctxt->src.type = OP_IMM; ctxt->src.val = 0; ctxt->src.bytes = 1; - fastop(ctxt, em_or); + em_or(ctxt); ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF); if (cf) ctxt->eflags |= X86_EFLAGS_CF; @@ -3103,7 +3057,7 @@ static int em_aam(struct x86_emulate_ctxt *ctxt) ctxt->src.type = OP_IMM; ctxt->src.val = 0; ctxt->src.bytes = 1; - fastop(ctxt, em_or); + em_or(ctxt); return X86EMUL_CONTINUE; } @@ -3121,7 +3075,7 @@ static int em_aad(struct x86_emulate_ctxt *ctxt) ctxt->src.type = OP_IMM; ctxt->src.val = 0; ctxt->src.bytes = 1; - fastop(ctxt, em_or); + em_or(ctxt); return X86EMUL_CONTINUE; } @@ -3185,7 +3139,7 @@ fail: static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) { int rc; - unsigned long eip; + unsigned long eip = 0; rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) @@ -3212,7 +3166,7 @@ static int em_xchg(struct x86_emulate_ctxt *ctxt) static int em_imul_3op(struct x86_emulate_ctxt *ctxt) { ctxt->dst.val = ctxt->src2.val; - return fastop(ctxt, em_imul); + return em_imul(ctxt); } static int em_cwd(struct x86_emulate_ctxt *ctxt) @@ -3441,8 +3395,10 @@ static int em_invlpg(struct x86_emulate_ctxt *ctxt) { int rc; ulong linear; + unsigned int max_size; - rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear); + rc = __linearize(ctxt, ctxt->src.addr.mem, &max_size, 1, ctxt->mode, + &linear, X86EMUL_F_INVLPG); if (rc == X86EMUL_CONTINUE) ctxt->ops->invlpg(ctxt, linear); /* Disable writeback. */ @@ -3520,7 +3476,8 @@ static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt) if (rc != X86EMUL_CONTINUE) return rc; if (ctxt->mode == X86EMUL_MODE_PROT64 && - emul_is_noncanonical_address(desc_ptr.address, ctxt)) + emul_is_noncanonical_address(desc_ptr.address, ctxt, + X86EMUL_F_DT_LOAD)) return emulate_gp(ctxt, 0); if (lgdt) ctxt->ops->set_gdt(ctxt, &desc_ptr); @@ -3865,15 +3822,6 @@ static int check_cr_access(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -static int check_dr7_gd(struct x86_emulate_ctxt *ctxt) -{ - unsigned long dr7; - - ctxt->ops->get_dr(ctxt, 7, &dr7); - - return dr7 & DR7_GD; -} - static int check_dr_read(struct x86_emulate_ctxt *ctxt) { int dr = ctxt->modrm_reg; @@ -3886,10 +3834,10 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt) if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5)) return emulate_ud(ctxt); - if (check_dr7_gd(ctxt)) { + if (ctxt->ops->get_dr(ctxt, 7) & DR7_GD) { ulong dr6; - ctxt->ops->get_dr(ctxt, 6, &dr6); + dr6 = ctxt->ops->get_dr(ctxt, 6); dr6 &= ~DR_TRAP_BITS; dr6 |= DR6_BD | DR6_ACTIVE_LOW; ctxt->ops->set_dr(ctxt, 6, dr6); @@ -3961,7 +3909,7 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt) * protected mode. */ if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || - ctxt->ops->check_pmc(ctxt, rcx)) + ctxt->ops->check_rdpmc_early(ctxt, rcx)) return emulate_gp(ctxt, 0); return X86EMUL_CONTINUE; @@ -3997,7 +3945,6 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) } #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } -#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } #define II(_f, _e, _i) \ { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i } #define IIP(_f, _e, _i, _p) \ @@ -4012,9 +3959,11 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define I2bvIP(_f, _e, _i, _p) \ IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p) -#define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \ - F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ - F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) +#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ + I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ + I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) + +static const struct opcode ud = I(SrcNone, emulate_ud); static const struct opcode group7_rm0[] = { N, @@ -4052,14 +4001,14 @@ static const struct opcode group7_rm7[] = { }; static const struct opcode group1[] = { - F(Lock, em_add), - F(Lock | PageTable, em_or), - F(Lock, em_adc), - F(Lock, em_sbb), - F(Lock | PageTable, em_and), - F(Lock, em_sub), - F(Lock, em_xor), - F(NoWrite, em_cmp), + I(Lock, em_add), + I(Lock | PageTable, em_or), + I(Lock, em_adc), + I(Lock, em_sbb), + I(Lock | PageTable, em_and), + I(Lock, em_sub), + I(Lock, em_xor), + I(NoWrite, em_cmp), }; static const struct opcode group1A[] = { @@ -4067,38 +4016,38 @@ static const struct opcode group1A[] = { }; static const struct opcode group2[] = { - F(DstMem | ModRM, em_rol), - F(DstMem | ModRM, em_ror), - F(DstMem | ModRM, em_rcl), - F(DstMem | ModRM, em_rcr), - F(DstMem | ModRM, em_shl), - F(DstMem | ModRM, em_shr), - F(DstMem | ModRM, em_shl), - F(DstMem | ModRM, em_sar), + I(DstMem | ModRM, em_rol), + I(DstMem | ModRM, em_ror), + I(DstMem | ModRM, em_rcl), + I(DstMem | ModRM, em_rcr), + I(DstMem | ModRM, em_shl), + I(DstMem | ModRM, em_shr), + I(DstMem | ModRM, em_shl), + I(DstMem | ModRM, em_sar), }; static const struct opcode group3[] = { - F(DstMem | SrcImm | NoWrite, em_test), - F(DstMem | SrcImm | NoWrite, em_test), - F(DstMem | SrcNone | Lock, em_not), - F(DstMem | SrcNone | Lock, em_neg), - F(DstXacc | Src2Mem, em_mul_ex), - F(DstXacc | Src2Mem, em_imul_ex), - F(DstXacc | Src2Mem, em_div_ex), - F(DstXacc | Src2Mem, em_idiv_ex), + I(DstMem | SrcImm | NoWrite, em_test), + I(DstMem | SrcImm | NoWrite, em_test), + I(DstMem | SrcNone | Lock, em_not), + I(DstMem | SrcNone | Lock, em_neg), + I(DstXacc | Src2Mem, em_mul_ex), + I(DstXacc | Src2Mem, em_imul_ex), + I(DstXacc | Src2Mem, em_div_ex), + I(DstXacc | Src2Mem, em_idiv_ex), }; static const struct opcode group4[] = { - F(ByteOp | DstMem | SrcNone | Lock, em_inc), - F(ByteOp | DstMem | SrcNone | Lock, em_dec), + I(ByteOp | DstMem | SrcNone | Lock, em_inc), + I(ByteOp | DstMem | SrcNone | Lock, em_dec), N, N, N, N, N, N, }; static const struct opcode group5[] = { - F(DstMem | SrcNone | Lock, em_inc), - F(DstMem | SrcNone | Lock, em_dec), - I(SrcMem | NearBranch | IsBranch, em_call_near_abs), - I(SrcMemFAddr | ImplicitOps | IsBranch, em_call_far), + I(DstMem | SrcNone | Lock, em_inc), + I(DstMem | SrcNone | Lock, em_dec), + I(SrcMem | NearBranch | IsBranch | ShadowStack, em_call_near_abs), + I(SrcMemFAddr | ImplicitOps | IsBranch | ShadowStack, em_call_far), I(SrcMem | NearBranch | IsBranch, em_jmp_abs), I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far), I(SrcMem | Stack | TwoMemOp, em_push), D(Undefined), @@ -4132,10 +4081,10 @@ static const struct group_dual group7 = { { static const struct opcode group8[] = { N, N, N, N, - F(DstMem | SrcImmByte | NoWrite, em_bt), - F(DstMem | SrcImmByte | Lock | PageTable, em_bts), - F(DstMem | SrcImmByte | Lock, em_btr), - F(DstMem | SrcImmByte | Lock | PageTable, em_btc), + I(DstMem | SrcImmByte | NoWrite, em_bt), + I(DstMem | SrcImmByte | Lock | PageTable, em_bts), + I(DstMem | SrcImmByte | Lock, em_btr), + I(DstMem | SrcImmByte | Lock | PageTable, em_btc), }; /* @@ -4172,7 +4121,7 @@ static const struct group_dual group15 = { { } }; static const struct gprefix pfx_0f_6f_0f_7f = { - I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), + I(Mmx, em_mov), I(Sse | Avx | Aligned, em_mov), N, I(Sse | Avx | Unaligned, em_mov), }; static const struct instr_dual instr_dual_0f_2b = { @@ -4191,8 +4140,8 @@ static const struct gprefix pfx_0f_28_0f_29 = { I(Aligned, em_mov), I(Aligned, em_mov), N, N, }; -static const struct gprefix pfx_0f_e7 = { - N, I(Sse, em_mov), N, N, +static const struct gprefix pfx_0f_e7_0f_38_2a = { + N, I(Sse | Avx, em_mov), N, N, }; static const struct escape escape_d9 = { { @@ -4272,31 +4221,31 @@ static const struct instr_dual instr_dual_8d = { static const struct opcode opcode_table[256] = { /* 0x00 - 0x07 */ - F6ALU(Lock, em_add), + I6ALU(Lock, em_add), I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg), /* 0x08 - 0x0F */ - F6ALU(Lock | PageTable, em_or), + I6ALU(Lock | PageTable, em_or), I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg), N, /* 0x10 - 0x17 */ - F6ALU(Lock, em_adc), + I6ALU(Lock, em_adc), I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg), I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg), /* 0x18 - 0x1F */ - F6ALU(Lock, em_sbb), + I6ALU(Lock, em_sbb), I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg), I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg), /* 0x20 - 0x27 */ - F6ALU(Lock | PageTable, em_and), N, N, + I6ALU(Lock | PageTable, em_and), N, N, /* 0x28 - 0x2F */ - F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), + I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), /* 0x30 - 0x37 */ - F6ALU(Lock, em_xor), N, N, + I6ALU(Lock, em_xor), N, N, /* 0x38 - 0x3F */ - F6ALU(NoWrite, em_cmp), N, N, + I6ALU(NoWrite, em_cmp), N, N, /* 0x40 - 0x4F */ - X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)), + X8(I(DstReg, em_inc)), X8(I(DstReg, em_dec)), /* 0x50 - 0x57 */ X8(I(SrcReg | Stack, em_push)), /* 0x58 - 0x5F */ @@ -4320,7 +4269,7 @@ static const struct opcode opcode_table[256] = { G(DstMem | SrcImm, group1), G(ByteOp | DstMem | SrcImm | No64, group1), G(DstMem | SrcImmByte, group1), - F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test), + I2bv(DstMem | SrcReg | ModRM | NoWrite, em_test), I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg), /* 0x88 - 0x8F */ I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov), @@ -4333,7 +4282,7 @@ static const struct opcode opcode_table[256] = { DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)), /* 0x98 - 0x9F */ D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), - I(SrcImmFAddr | No64 | IsBranch, em_call_far), N, + I(SrcImmFAddr | No64 | IsBranch | ShadowStack, em_call_far), N, II(ImplicitOps | Stack, em_pushf, pushf), II(ImplicitOps | Stack, em_popf, popf), I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf), @@ -4341,37 +4290,37 @@ static const struct opcode opcode_table[256] = { I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov), - F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r), + I2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r), /* 0xA8 - 0xAF */ - F2bv(DstAcc | SrcImm | NoWrite, em_test), + I2bv(DstAcc | SrcImm | NoWrite, em_test), I2bv(SrcAcc | DstDI | Mov | String, em_mov), I2bv(SrcSI | DstAcc | Mov | String, em_mov), - F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r), + I2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r), /* 0xB0 - 0xB7 */ X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), /* 0xB8 - 0xBF */ X8(I(DstReg | SrcImm64 | Mov, em_mov)), /* 0xC0 - 0xC7 */ G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2), - I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch, em_ret_near_imm), - I(ImplicitOps | NearBranch | IsBranch, em_ret), + I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch | ShadowStack, em_ret_near_imm), + I(ImplicitOps | NearBranch | IsBranch | ShadowStack, em_ret), I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg), I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg), G(ByteOp, group11), G(0, group11), /* 0xC8 - 0xCF */ - I(Stack | SrcImmU16 | Src2ImmByte | IsBranch, em_enter), - I(Stack | IsBranch, em_leave), - I(ImplicitOps | SrcImmU16 | IsBranch, em_ret_far_imm), - I(ImplicitOps | IsBranch, em_ret_far), - D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch, intn), + I(Stack | SrcImmU16 | Src2ImmByte, em_enter), + I(Stack, em_leave), + I(ImplicitOps | SrcImmU16 | IsBranch | ShadowStack, em_ret_far_imm), + I(ImplicitOps | IsBranch | ShadowStack, em_ret_far), + D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch | ShadowStack, intn), D(ImplicitOps | No64 | IsBranch), - II(ImplicitOps | IsBranch, em_iret, iret), + II(ImplicitOps | IsBranch | ShadowStack, em_iret, iret), /* 0xD0 - 0xD7 */ G(Src2One | ByteOp, group2), G(Src2One, group2), G(Src2CL | ByteOp, group2), G(Src2CL, group2), I(DstAcc | SrcImmUByte | No64, em_aam), I(DstAcc | SrcImmUByte | No64, em_aad), - F(DstAcc | ByteOp | No64, em_salc), + I(DstAcc | ByteOp | No64, em_salc), I(DstAcc | SrcXLat | ByteOp, em_mov), /* 0xD8 - 0xDF */ N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N, @@ -4381,7 +4330,7 @@ static const struct opcode opcode_table[256] = { I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in), I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out), /* 0xE8 - 0xEF */ - I(SrcImm | NearBranch | IsBranch, em_call), + I(SrcImm | NearBranch | IsBranch | ShadowStack, em_call), D(SrcImm | ImplicitOps | NearBranch | IsBranch), I(SrcImmFAddr | No64 | IsBranch, em_jmp_far), D(SrcImmByte | ImplicitOps | NearBranch | IsBranch), @@ -4400,13 +4349,13 @@ static const struct opcode opcode_table[256] = { static const struct opcode twobyte_table[256] = { /* 0x00 - 0x0F */ G(0, group6), GD(0, &group7), N, N, - N, I(ImplicitOps | EmulateOnUD | IsBranch, em_syscall), + N, I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack, em_syscall), II(ImplicitOps | Priv, em_clts, clts), N, DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N, /* 0x10 - 0x1F */ - GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_10_0f_11), - GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_10_0f_11), + GP(ModRM | DstReg | SrcMem | Mov | Sse | Avx, &pfx_0f_10_0f_11), + GP(ModRM | DstMem | SrcReg | Mov | Sse | Avx, &pfx_0f_10_0f_11), N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 4 * prefetch + 4 * reserved NOP */ D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N, @@ -4422,17 +4371,17 @@ static const struct opcode twobyte_table[256] = { IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write, check_dr_write), N, N, N, N, - GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29), - GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29), - N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b), + GP(ModRM | DstReg | SrcMem | Mov | Sse | Avx, &pfx_0f_28_0f_29), + GP(ModRM | DstMem | SrcReg | Mov | Sse | Avx, &pfx_0f_28_0f_29), + N, GP(ModRM | DstMem | SrcReg | Mov | Sse | Avx, &pfx_0f_2b), N, N, N, N, /* 0x30 - 0x3F */ II(ImplicitOps | Priv, em_wrmsr, wrmsr), IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), II(ImplicitOps | Priv, em_rdmsr, rdmsr), IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc), - I(ImplicitOps | EmulateOnUD | IsBranch, em_sysenter), - I(ImplicitOps | Priv | EmulateOnUD | IsBranch, em_sysexit), + I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack, em_sysenter), + I(ImplicitOps | Priv | EmulateOnUD | IsBranch | ShadowStack, em_sysexit), N, N, N, N, N, N, N, N, N, N, /* 0x40 - 0x4F */ @@ -4456,32 +4405,32 @@ static const struct opcode twobyte_table[256] = { /* 0xA0 - 0xA7 */ I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), II(ImplicitOps, em_cpuid, cpuid), - F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt), - F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld), - F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, + I(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt), + I(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld), + I(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, /* 0xA8 - 0xAF */ I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), II(EmulateOnUD | ImplicitOps, em_rsm, rsm), - F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), - F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), - F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), - GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul), + I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), + I(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), + I(DstMem | SrcReg | Src2CL | ModRM, em_shrd), + GD(0, &group15), I(DstReg | SrcMem | ModRM, em_imul), /* 0xB0 - 0xB7 */ I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg), I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), - F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), + I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), /* 0xB8 - 0xBF */ N, N, G(BitOp, group8), - F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), + I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), I(DstReg | SrcMem | ModRM, em_bsf_c), I(DstReg | SrcMem | ModRM, em_bsr_c), D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), /* 0xC0 - 0xC7 */ - F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd), + I2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd), N, ID(0, &instr_dual_0f_c3), N, N, N, GD(0, &group9), /* 0xC8 - 0xCF */ @@ -4489,7 +4438,7 @@ static const struct opcode twobyte_table[256] = { /* 0xD0 - 0xDF */ N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, /* 0xE0 - 0xEF */ - N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7), + N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7_0f_38_2a), N, N, N, N, N, N, N, N, /* 0xF0 - 0xFF */ N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N @@ -4504,11 +4453,11 @@ static const struct instr_dual instr_dual_0f_38_f1 = { }; static const struct gprefix three_byte_0f_38_f0 = { - ID(0, &instr_dual_0f_38_f0), N, N, N + ID(0, &instr_dual_0f_38_f0), ID(0, &instr_dual_0f_38_f0), N, N }; static const struct gprefix three_byte_0f_38_f1 = { - ID(0, &instr_dual_0f_38_f1), N, N, N + ID(0, &instr_dual_0f_38_f1), ID(0, &instr_dual_0f_38_f1), N, N }; /* @@ -4516,8 +4465,13 @@ static const struct gprefix three_byte_0f_38_f1 = { * byte. */ static const struct opcode opcode_map_0f_38[256] = { - /* 0x00 - 0x7f */ - X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), + /* 0x00 - 0x1f */ + X16(N), X16(N), + /* 0x20 - 0x2f */ + X8(N), + X2(N), GP(SrcReg | DstMem | ModRM | Mov | Aligned, &pfx_0f_e7_0f_38_2a), N, N, N, N, N, + /* 0x30 - 0x7f */ + X16(N), X16(N), X16(N), X16(N), X16(N), /* 0x80 - 0xef */ X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), /* 0xf0 - 0xf1 */ @@ -4543,6 +4497,60 @@ static const struct opcode opcode_map_0f_38[256] = { #undef I2bvIP #undef I6ALU +static bool is_shstk_instruction(struct x86_emulate_ctxt *ctxt) +{ + return ctxt->d & ShadowStack; +} + +static bool is_ibt_instruction(struct x86_emulate_ctxt *ctxt) +{ + u64 flags = ctxt->d; + + if (!(flags & IsBranch)) + return false; + + /* + * All far JMPs and CALLs (including SYSCALL, SYSENTER, and INTn) are + * indirect and thus affect IBT state. All far RETs (including SYSEXIT + * and IRET) are protected via Shadow Stacks and thus don't affect IBT + * state. IRET #GPs when returning to virtual-8086 and IBT or SHSTK is + * enabled, but that should be handled by IRET emulation (in the very + * unlikely scenario that KVM adds support for fully emulating IRET). + */ + if (!(flags & NearBranch)) + return ctxt->execute != em_iret && + ctxt->execute != em_ret_far && + ctxt->execute != em_ret_far_imm && + ctxt->execute != em_sysexit; + + switch (flags & SrcMask) { + case SrcReg: + case SrcMem: + case SrcMem16: + case SrcMem32: + return true; + case SrcMemFAddr: + case SrcImmFAddr: + /* Far branches should be handled above. */ + WARN_ON_ONCE(1); + return true; + case SrcNone: + case SrcImm: + case SrcImmByte: + /* + * Note, ImmU16 is used only for the stack adjustment operand on ENTER + * and RET instructions. ENTER isn't a branch and RET FAR is handled + * by the NearBranch check above. RET itself isn't an indirect branch. + */ + case SrcImmU16: + return false; + default: + WARN_ONCE(1, "Unexpected Src operand '%llx' on branch", + flags & SrcMask); + return false; + } +} + static unsigned imm_size(struct x86_emulate_ctxt *ctxt) { unsigned size; @@ -4622,14 +4630,12 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); fetch_register_operand(op); - op->orig_val = op->val; break; case OpAccLo: op->type = OP_REG; op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes; op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); fetch_register_operand(op); - op->orig_val = op->val; break; case OpAccHi: if (ctxt->d & ByteOp) { @@ -4640,7 +4646,6 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, op->bytes = ctxt->op_bytes; op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX); fetch_register_operand(op); - op->orig_val = op->val; break; case OpDI: op->type = OP_MEM; @@ -4759,12 +4764,87 @@ done: return rc; } +static int x86_decode_avx(struct x86_emulate_ctxt *ctxt, + u8 vex_1st, u8 vex_2nd, struct opcode *opcode) +{ + u8 vex_3rd, map, pp, l, v; + int rc = X86EMUL_CONTINUE; + + if (ctxt->rep_prefix || ctxt->op_prefix || ctxt->rex_prefix) + goto ud; + + if (vex_1st == 0xc5) { + /* Expand RVVVVlpp to VEX3 format */ + vex_3rd = vex_2nd & ~0x80; /* VVVVlpp from VEX2, w=0 */ + vex_2nd = (vex_2nd & 0x80) | 0x61; /* R from VEX2, X=1 B=1 mmmmm=00001 */ + } else { + vex_3rd = insn_fetch(u8, ctxt); + } + + /* vex_2nd = RXBmmmmm, vex_3rd = wVVVVlpp. Fix polarity */ + vex_2nd ^= 0xE0; /* binary 11100000 */ + vex_3rd ^= 0x78; /* binary 01111000 */ + + ctxt->rex_prefix = REX_PREFIX; + ctxt->rex_bits = (vex_2nd & 0xE0) >> 5; /* RXB */ + ctxt->rex_bits |= (vex_3rd & 0x80) >> 4; /* w */ + if (ctxt->rex_bits && ctxt->mode != X86EMUL_MODE_PROT64) + goto ud; + + map = vex_2nd & 0x1f; + v = (vex_3rd >> 3) & 0xf; + l = vex_3rd & 0x4; + pp = vex_3rd & 0x3; + + ctxt->b = insn_fetch(u8, ctxt); + switch (map) { + case 1: + ctxt->opcode_len = 2; + *opcode = twobyte_table[ctxt->b]; + break; + case 2: + ctxt->opcode_len = 3; + *opcode = opcode_map_0f_38[ctxt->b]; + break; + case 3: + /* no 0f 3a instructions are supported yet */ + return X86EMUL_UNHANDLEABLE; + default: + goto ud; + } + + /* + * No three operand instructions are supported yet; those that + * *are* marked with the Avx flag reserve the VVVV flag. + */ + if (v) + goto ud; + + if (l) + ctxt->op_bytes = 32; + else + ctxt->op_bytes = 16; + + switch (pp) { + case 0: break; + case 1: ctxt->op_prefix = true; break; + case 2: ctxt->rep_prefix = 0xf3; break; + case 3: ctxt->rep_prefix = 0xf2; break; + } + +done: + return rc; +ud: + *opcode = ud; + return rc; +} + int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type) { int rc = X86EMUL_CONTINUE; int mode = ctxt->mode; int def_op_bytes, def_ad_bytes, goffset, simd_prefix; - bool op_prefix = false; + bool vex_prefix = false; bool has_seg_override = false; struct opcode opcode; u16 dummy; @@ -4816,7 +4896,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int for (;;) { switch (ctxt->b = insn_fetch(u8, ctxt)) { case 0x66: /* operand-size override */ - op_prefix = true; + ctxt->op_prefix = true; /* switch between 2/4 bytes */ ctxt->op_bytes = def_op_bytes ^ 6; break; @@ -4855,7 +4935,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int case 0x40 ... 0x4f: /* REX */ if (mode != X86EMUL_MODE_PROT64) goto done_prefixes; - ctxt->rex_prefix = ctxt->b; + ctxt->rex_prefix = REX_PREFIX; + ctxt->rex_bits = ctxt->b & 0xf; continue; case 0xf0: /* LOCK */ ctxt->lock_prefix = 1; @@ -4869,20 +4950,33 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int } /* Any legacy prefix after a REX prefix nullifies its effect. */ - - ctxt->rex_prefix = 0; + ctxt->rex_prefix = REX_NONE; + ctxt->rex_bits = 0; } done_prefixes: /* REX prefix. */ - if (ctxt->rex_prefix & 8) - ctxt->op_bytes = 8; /* REX.W */ + if (ctxt->rex_bits & REX_W) + ctxt->op_bytes = 8; /* Opcode byte(s). */ - opcode = opcode_table[ctxt->b]; - /* Two-byte opcode? */ - if (ctxt->b == 0x0f) { + if (ctxt->b == 0xc4 || ctxt->b == 0xc5) { + /* VEX or LDS/LES */ + u8 vex_2nd = insn_fetch(u8, ctxt); + if (mode != X86EMUL_MODE_PROT64 && (vex_2nd & 0xc0) != 0xc0) { + opcode = opcode_table[ctxt->b]; + ctxt->modrm = vex_2nd; + /* the Mod/RM byte has been fetched already! */ + goto done_modrm; + } + + vex_prefix = true; + rc = x86_decode_avx(ctxt, ctxt->b, vex_2nd, &opcode); + if (rc != X86EMUL_CONTINUE) + goto done; + } else if (ctxt->b == 0x0f) { + /* Two- or three-byte opcode */ ctxt->opcode_len = 2; ctxt->b = insn_fetch(u8, ctxt); opcode = twobyte_table[ctxt->b]; @@ -4893,18 +4987,16 @@ done_prefixes: ctxt->b = insn_fetch(u8, ctxt); opcode = opcode_map_0f_38[ctxt->b]; } + } else { + /* Opcode byte(s). */ + opcode = opcode_table[ctxt->b]; } - ctxt->d = opcode.flags; - if (ctxt->d & ModRM) + if (opcode.flags & ModRM) ctxt->modrm = insn_fetch(u8, ctxt); - /* vex-prefix instructions are not implemented */ - if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) && - (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) { - ctxt->d = NotImpl; - } - +done_modrm: + ctxt->d = opcode.flags; while (ctxt->d & GroupMask) { switch (ctxt->d & GroupMask) { case Group: @@ -4923,9 +5015,9 @@ done_prefixes: opcode = opcode.u.group[goffset]; break; case Prefix: - if (ctxt->rep_prefix && op_prefix) + if (ctxt->rep_prefix && ctxt->op_prefix) return EMULATION_FAILED; - simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix; + simd_prefix = ctxt->op_prefix ? 0x66 : ctxt->rep_prefix; switch (simd_prefix) { case 0x00: opcode = opcode.u.gprefix->pfx_no; break; case 0x66: opcode = opcode.u.gprefix->pfx_66; break; @@ -4970,8 +5062,55 @@ done_prefixes: if (ctxt->d == 0) return EMULATION_FAILED; + if (unlikely(vex_prefix)) { + /* + * Only specifically marked instructions support VEX. Since many + * instructions support it but are not annotated, return not implemented + * rather than #UD. + */ + if (!(ctxt->d & Avx)) + return EMULATION_FAILED; + + if (!(ctxt->d & AlignMask)) + ctxt->d |= Unaligned; + } + ctxt->execute = opcode.u.execute; + /* + * Reject emulation if KVM might need to emulate shadow stack updates + * and/or indirect branch tracking enforcement, which the emulator + * doesn't support. + */ + if ((is_ibt_instruction(ctxt) || is_shstk_instruction(ctxt)) && + ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET) { + u64 u_cet = 0, s_cet = 0; + + /* + * Check both User and Supervisor on far transfers as inter- + * privilege level transfers are impacted by CET at the target + * privilege level, and that is not known at this time. The + * expectation is that the guest will not require emulation of + * any CET-affected instructions at any privilege level. + */ + if (!(ctxt->d & NearBranch)) + u_cet = s_cet = CET_SHSTK_EN | CET_ENDBR_EN; + else if (ctxt->ops->cpl(ctxt) == 3) + u_cet = CET_SHSTK_EN | CET_ENDBR_EN; + else + s_cet = CET_SHSTK_EN | CET_ENDBR_EN; + + if ((u_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_U_CET, &u_cet)) || + (s_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_S_CET, &s_cet))) + return EMULATION_FAILED; + + if ((u_cet | s_cet) & CET_SHSTK_EN && is_shstk_instruction(ctxt)) + return EMULATION_FAILED; + + if ((u_cet | s_cet) & CET_ENDBR_EN && is_ibt_instruction(ctxt)) + return EMULATION_FAILED; + } + if (unlikely(emulation_type & EMULTYPE_TRAP_UD) && likely(!(ctxt->d & EmulateOnUD))) return EMULATION_FAILED; @@ -5006,8 +5145,10 @@ done_prefixes: if ((ctxt->d & No16) && ctxt->op_bytes == 2) ctxt->op_bytes = 4; - if (ctxt->d & Sse) - ctxt->op_bytes = 16; + if (vex_prefix) + ; + else if (ctxt->d & Sse) + ctxt->op_bytes = 16, ctxt->d &= ~Avx; else if (ctxt->d & Mmx) ctxt->op_bytes = 8; } @@ -5103,30 +5244,14 @@ static void fetch_possible_mmx_operand(struct operand *op) kvm_read_mmx_reg(op->addr.mm, &op->mm_val); } -static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop) -{ - ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; - - if (!(ctxt->d & ByteOp)) - fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; - - asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n" - : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), - [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT - : "c"(ctxt->src2.val)); - - ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); - if (!fop) /* exception is returned in fop variable */ - return emulate_de(ctxt); - return X86EMUL_CONTINUE; -} - void init_decode_cache(struct x86_emulate_ctxt *ctxt) { /* Clear fields that are set conditionally but read without a guard. */ ctxt->rip_relative = false; - ctxt->rex_prefix = 0; + ctxt->rex_prefix = REX_NONE; + ctxt->rex_bits = 0; ctxt->lock_prefix = 0; + ctxt->op_prefix = false; ctxt->rep_prefix = 0; ctxt->regs_valid = 0; ctxt->regs_dirty = 0; @@ -5136,12 +5261,11 @@ void init_decode_cache(struct x86_emulate_ctxt *ctxt) ctxt->mem_read.end = 0; } -int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) +int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, bool check_intercepts) { const struct x86_emulate_ops *ops = ctxt->ops; int rc = X86EMUL_CONTINUE; int saved_dst_type = ctxt->dst.type; - bool is_guest_mode = ctxt->ops->is_guest_mode(ctxt); ctxt->mem_read.pos = 0; @@ -5157,20 +5281,34 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) } if (unlikely(ctxt->d & - (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) { + (No64|Undefined|Avx|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) { if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) || (ctxt->d & Undefined)) { rc = emulate_ud(ctxt); goto done; } - if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM))) - || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { + if ((ctxt->d & (Avx|Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM))) { rc = emulate_ud(ctxt); goto done; } - if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { + if (ctxt->d & Avx) { + u64 xcr = 0; + if (!(ops->get_cr(ctxt, 4) & X86_CR4_OSXSAVE) + || ops->get_xcr(ctxt, 0, &xcr) + || !(xcr & XFEATURE_MASK_YMM)) { + rc = emulate_ud(ctxt); + goto done; + } + } else if (ctxt->d & Sse) { + if (!(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR)) { + rc = emulate_ud(ctxt); + goto done; + } + } + + if ((ctxt->d & (Avx|Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { rc = emulate_nm(ctxt); goto done; } @@ -5189,7 +5327,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) fetch_possible_mmx_operand(&ctxt->dst); } - if (unlikely(is_guest_mode) && ctxt->intercept) { + if (unlikely(check_intercepts) && ctxt->intercept) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_PRE_EXCEPT); if (rc != X86EMUL_CONTINUE) @@ -5218,7 +5356,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } - if (unlikely(is_guest_mode) && (ctxt->d & Intercept)) { + if (unlikely(check_intercepts) && (ctxt->d & Intercept)) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_POST_EXCEPT); if (rc != X86EMUL_CONTINUE) @@ -5272,7 +5410,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) special_insn: - if (unlikely(is_guest_mode) && (ctxt->d & Intercept)) { + if (unlikely(check_intercepts) && (ctxt->d & Intercept)) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_POST_MEMACCESS); if (rc != X86EMUL_CONTINUE) @@ -5285,10 +5423,7 @@ special_insn: ctxt->eflags &= ~X86_EFLAGS_RF; if (ctxt->execute) { - if (ctxt->d & Fastop) - rc = fastop(ctxt, ctxt->fop); - else - rc = ctxt->execute(ctxt); + rc = ctxt->execute(ctxt); if (rc != X86EMUL_CONTINUE) goto done; goto writeback; @@ -5448,7 +5583,7 @@ twobyte_insn: ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg); break; case 0x21: /* mov from dr to reg */ - ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val); + ctxt->dst.val = ops->get_dr(ctxt, ctxt->modrm_reg); break; case 0x40 ... 0x4f: /* cmov */ if (test_cc(ctxt->b, ctxt->eflags)) |
