diff options
Diffstat (limited to 'arch/x86/kvm/emulate.c')
| -rw-r--r-- | arch/x86/kvm/emulate.c | 2075 |
1 files changed, 913 insertions, 1162 deletions
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d0e2825ae617..c8e292e9a24d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -17,14 +17,16 @@ * * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4 */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> #include "kvm_cache_regs.h" #include "kvm_emulate.h" #include <linux/stringify.h> -#include <asm/fpu/api.h> #include <asm/debugreg.h> #include <asm/nospec-branch.h> +#include <asm/ibt.h> +#include <asm/text-patching.h> #include "x86.h" #include "tss.h" @@ -79,9 +81,8 @@ */ /* Operand sizes: 8-bit operands or specified/overridden size. */ -#define ByteOp (1<<0) /* 8-bit operands. */ -/* Destination operand type. */ -#define DstShift 1 +#define ByteOp (1<<0) /* 8-bit operands. */ +#define DstShift 1 /* Destination operand type at bits 1-5 */ #define ImplicitOps (OpImplicit << DstShift) #define DstReg (OpReg << DstShift) #define DstMem (OpMem << DstShift) @@ -93,8 +94,7 @@ #define DstDX (OpDX << DstShift) #define DstAccLo (OpAccLo << DstShift) #define DstMask (OpMask << DstShift) -/* Source operand type. */ -#define SrcShift 6 +#define SrcShift 6 /* Source operand type at bits 6-10 */ #define SrcNone (OpNone << SrcShift) #define SrcReg (OpReg << SrcShift) #define SrcMem (OpMem << SrcShift) @@ -117,10 +117,10 @@ #define SrcAccHi (OpAccHi << SrcShift) #define SrcMask (OpMask << SrcShift) #define BitOp (1<<11) -#define MemAbs (1<<12) /* Memory operand is absolute displacement */ +#define MemAbs (1<<12) /* Memory operand is absolute displacement */ #define String (1<<13) /* String instruction (rep capable) */ #define Stack (1<<14) /* Stack instruction (push/pop) */ -#define GroupMask (7<<15) /* Opcode uses one of the group mechanisms */ +#define GroupMask (7<<15) /* Group mechanisms, at bits 15-17 */ #define Group (1<<15) /* Bits 3:5 of modrm byte extend opcode */ #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */ #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */ @@ -129,11 +129,8 @@ #define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */ #define ModeDual (7<<15) /* Different instruction for 32/64 bit */ #define Sse (1<<18) /* SSE Vector instruction */ -/* Generic ModRM decode. */ -#define ModRM (1<<19) -/* Destination is only written; never read. */ -#define Mov (1<<20) -/* Misc flags */ +#define ModRM (1<<19) /* Generic ModRM decode. */ +#define Mov (1<<20) /* Destination is only written; never read. */ #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ #define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */ #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ @@ -141,11 +138,11 @@ #define Undefined (1<<25) /* No Such Instruction */ #define Lock (1<<26) /* lock prefix is allowed for the instruction */ #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ -#define No64 (1<<28) +#define No64 (1<<28) /* Instruction generates #UD in 64-bit mode */ #define PageTable (1 << 29) /* instruction used to write page table */ #define NotImpl (1 << 30) /* instruction is not implemented */ -/* Source 2 operand type */ -#define Src2Shift (31) +#define Avx ((u64)1 << 31) /* Instruction uses VEX prefix */ +#define Src2Shift (32) /* Source 2 operand type at bits 32-36 */ #define Src2None (OpNone << Src2Shift) #define Src2Mem (OpMem << Src2Shift) #define Src2CL (OpCL << Src2Shift) @@ -159,13 +156,13 @@ #define Src2FS (OpFS << Src2Shift) #define Src2GS (OpGS << Src2Shift) #define Src2Mask (OpMask << Src2Shift) +/* free: 37-39 */ #define Mmx ((u64)1 << 40) /* MMX Vector instruction */ -#define AlignMask ((u64)7 << 41) +#define AlignMask ((u64)3 << 41) /* Memory alignment requirement at bits 41-42 */ #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */ #define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */ -#define Avx ((u64)3 << 41) /* Advanced Vector Extensions */ -#define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */ -#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */ +#define Aligned16 ((u64)3 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */ +/* free: 43-44 */ #define NoWrite ((u64)1 << 45) /* No writeback */ #define SrcWrite ((u64)1 << 46) /* Write back src operand */ #define NoMod ((u64)1 << 47) /* Mod field is ignored */ @@ -176,6 +173,8 @@ #define No16 ((u64)1 << 53) /* No 16 bit operand */ #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */ #define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */ +#define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */ +#define ShadowStack ((u64)1 << 57) /* Instruction affects Shadow Stacks. */ #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) @@ -188,12 +187,10 @@ #define X8(x...) X4(x), X4(x) #define X16(x...) X8(x), X8(x) -#define NR_FASTOP (ilog2(sizeof(ulong)) + 1) -#define FASTOP_SIZE 8 - struct opcode { - u64 flags : 56; - u64 intercept : 8; + u64 flags; + u8 intercept; + u8 pad[7]; union { int (*execute)(struct x86_emulate_ctxt *ctxt); const struct opcode *group; @@ -202,7 +199,6 @@ struct opcode { const struct escape *esc; const struct instr_dual *idual; const struct mode_dual *mdual; - void (*fastop)(struct fastop *fake); } u; int (*check_perm)(struct x86_emulate_ctxt *ctxt); }; @@ -243,33 +239,19 @@ enum x86_transfer_type { X86_TRANSFER_TASK_SWITCH, }; -static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr) -{ - if (!(ctxt->regs_valid & (1 << nr))) { - ctxt->regs_valid |= 1 << nr; - ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr); - } - return ctxt->_regs[nr]; -} - -static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr) -{ - ctxt->regs_valid |= 1 << nr; - ctxt->regs_dirty |= 1 << nr; - return &ctxt->_regs[nr]; -} - -static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr) -{ - reg_read(ctxt, nr); - return reg_write(ctxt, nr); -} +enum rex_bits { + REX_B = 1, + REX_X = 2, + REX_R = 4, + REX_W = 8, +}; static void writeback_registers(struct x86_emulate_ctxt *ctxt) { + unsigned long dirty = ctxt->regs_dirty; unsigned reg; - for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16) + for_each_set_bit(reg, &dirty, NR_EMULATOR_GPRS) ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]); } @@ -287,187 +269,134 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) X86_EFLAGS_PF|X86_EFLAGS_CF) #ifdef CONFIG_X86_64 -#define ON64(x) x +#define ON64(x...) x #else -#define ON64(x) +#define ON64(x...) #endif -/* - * fastop functions have a special calling convention: - * - * dst: rax (in/out) - * src: rdx (in/out) - * src2: rcx (in) - * flags: rflags (in/out) - * ex: rsi (in:fastop pointer, out:zero if exception) - * - * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for - * different operand sizes can be reached by calculation, rather than a jump - * table (which would be bigger than the code). - */ -static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); - -#define __FOP_FUNC(name) \ - ".align " __stringify(FASTOP_SIZE) " \n\t" \ - ".type " name ", @function \n\t" \ - name ":\n\t" - -#define FOP_FUNC(name) \ - __FOP_FUNC(#name) - -#define __FOP_RET(name) \ - "ret \n\t" \ - ".size " name ", .-" name "\n\t" - -#define FOP_RET(name) \ - __FOP_RET(#name) - -#define FOP_START(op) \ - extern void em_##op(struct fastop *fake); \ - asm(".pushsection .text, \"ax\" \n\t" \ - ".global em_" #op " \n\t" \ - ".align " __stringify(FASTOP_SIZE) " \n\t" \ - "em_" #op ":\n\t" - -#define FOP_END \ - ".popsection") - -#define __FOPNOP(name) \ - __FOP_FUNC(name) \ - __FOP_RET(name) - -#define FOPNOP() \ - __FOPNOP(__stringify(__UNIQUE_ID(nop))) - -#define FOP1E(op, dst) \ - __FOP_FUNC(#op "_" #dst) \ - "10: " #op " %" #dst " \n\t" \ - __FOP_RET(#op "_" #dst) - -#define FOP1EEX(op, dst) \ - FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception) - -#define FASTOP1(op) \ - FOP_START(op) \ - FOP1E(op##b, al) \ - FOP1E(op##w, ax) \ - FOP1E(op##l, eax) \ - ON64(FOP1E(op##q, rax)) \ - FOP_END - -/* 1-operand, using src2 (for MUL/DIV r/m) */ -#define FASTOP1SRC2(op, name) \ - FOP_START(name) \ - FOP1E(op, cl) \ - FOP1E(op, cx) \ - FOP1E(op, ecx) \ - ON64(FOP1E(op, rcx)) \ - FOP_END - -/* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */ -#define FASTOP1SRC2EX(op, name) \ - FOP_START(name) \ - FOP1EEX(op, cl) \ - FOP1EEX(op, cx) \ - FOP1EEX(op, ecx) \ - ON64(FOP1EEX(op, rcx)) \ - FOP_END - -#define FOP2E(op, dst, src) \ - __FOP_FUNC(#op "_" #dst "_" #src) \ - #op " %" #src ", %" #dst " \n\t" \ - __FOP_RET(#op "_" #dst "_" #src) - -#define FASTOP2(op) \ - FOP_START(op) \ - FOP2E(op##b, al, dl) \ - FOP2E(op##w, ax, dx) \ - FOP2E(op##l, eax, edx) \ - ON64(FOP2E(op##q, rax, rdx)) \ - FOP_END - -/* 2 operand, word only */ -#define FASTOP2W(op) \ - FOP_START(op) \ - FOPNOP() \ - FOP2E(op##w, ax, dx) \ - FOP2E(op##l, eax, edx) \ - ON64(FOP2E(op##q, rax, rdx)) \ - FOP_END - -/* 2 operand, src is CL */ -#define FASTOP2CL(op) \ - FOP_START(op) \ - FOP2E(op##b, al, cl) \ - FOP2E(op##w, ax, cl) \ - FOP2E(op##l, eax, cl) \ - ON64(FOP2E(op##q, rax, cl)) \ - FOP_END - -/* 2 operand, src and dest are reversed */ -#define FASTOP2R(op, name) \ - FOP_START(name) \ - FOP2E(op##b, dl, al) \ - FOP2E(op##w, dx, ax) \ - FOP2E(op##l, edx, eax) \ - ON64(FOP2E(op##q, rdx, rax)) \ - FOP_END - -#define FOP3E(op, dst, src, src2) \ - __FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \ - #op " %" #src2 ", %" #src ", %" #dst " \n\t"\ - __FOP_RET(#op "_" #dst "_" #src "_" #src2) - -/* 3-operand, word-only, src2=cl */ -#define FASTOP3WCL(op) \ - FOP_START(op) \ - FOPNOP() \ - FOP3E(op##w, ax, dx, cl) \ - FOP3E(op##l, eax, edx, cl) \ - ON64(FOP3E(op##q, rax, rdx, cl)) \ - FOP_END - -/* Special case for SETcc - 1 instruction per cc */ -#define FOP_SETCC(op) \ - ".align 4 \n\t" \ - ".type " #op ", @function \n\t" \ - #op ": \n\t" \ - #op " %al \n\t" \ - __FOP_RET(#op) - -asm(".pushsection .fixup, \"ax\"\n" - ".global kvm_fastop_exception \n" - "kvm_fastop_exception: xor %esi, %esi; ret\n" - ".popsection"); - -FOP_START(setcc) -FOP_SETCC(seto) -FOP_SETCC(setno) -FOP_SETCC(setc) -FOP_SETCC(setnc) -FOP_SETCC(setz) -FOP_SETCC(setnz) -FOP_SETCC(setbe) -FOP_SETCC(setnbe) -FOP_SETCC(sets) -FOP_SETCC(setns) -FOP_SETCC(setp) -FOP_SETCC(setnp) -FOP_SETCC(setl) -FOP_SETCC(setnl) -FOP_SETCC(setle) -FOP_SETCC(setnle) -FOP_END; - -FOP_START(salc) -FOP_FUNC(salc) -"pushf; sbb %al, %al; popf \n\t" -FOP_RET(salc) -FOP_END; +#define EM_ASM_START(op) \ +static int em_##op(struct x86_emulate_ctxt *ctxt) \ +{ \ + unsigned long flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; \ + int bytes = 1, ok = 1; \ + if (!(ctxt->d & ByteOp)) \ + bytes = ctxt->dst.bytes; \ + switch (bytes) { + +#define __EM_ASM(str) \ + asm("push %[flags]; popf \n\t" \ + "10: " str \ + "pushf; pop %[flags] \n\t" \ + "11: \n\t" \ + : "+a" (ctxt->dst.val), \ + "+d" (ctxt->src.val), \ + [flags] "+D" (flags), \ + "+S" (ok) \ + : "c" (ctxt->src2.val)) + +#define __EM_ASM_1(op, dst) \ + __EM_ASM(#op " %%" #dst " \n\t") + +#define __EM_ASM_1_EX(op, dst) \ + __EM_ASM(#op " %%" #dst " \n\t" \ + _ASM_EXTABLE_TYPE_REG(10b, 11f, EX_TYPE_ZERO_REG, %%esi)) + +#define __EM_ASM_2(op, dst, src) \ + __EM_ASM(#op " %%" #src ", %%" #dst " \n\t") + +#define __EM_ASM_3(op, dst, src, src2) \ + __EM_ASM(#op " %%" #src2 ", %%" #src ", %%" #dst " \n\t") + +#define EM_ASM_END \ + } \ + ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); \ + return !ok ? emulate_de(ctxt) : X86EMUL_CONTINUE; \ +} + +/* 1-operand, using "a" (dst) */ +#define EM_ASM_1(op) \ + EM_ASM_START(op) \ + case 1: __EM_ASM_1(op##b, al); break; \ + case 2: __EM_ASM_1(op##w, ax); break; \ + case 4: __EM_ASM_1(op##l, eax); break; \ + ON64(case 8: __EM_ASM_1(op##q, rax); break;) \ + EM_ASM_END + +/* 1-operand, using "c" (src2) */ +#define EM_ASM_1SRC2(op, name) \ + EM_ASM_START(name) \ + case 1: __EM_ASM_1(op##b, cl); break; \ + case 2: __EM_ASM_1(op##w, cx); break; \ + case 4: __EM_ASM_1(op##l, ecx); break; \ + ON64(case 8: __EM_ASM_1(op##q, rcx); break;) \ + EM_ASM_END + +/* 1-operand, using "c" (src2) with exception */ +#define EM_ASM_1SRC2EX(op, name) \ + EM_ASM_START(name) \ + case 1: __EM_ASM_1_EX(op##b, cl); break; \ + case 2: __EM_ASM_1_EX(op##w, cx); break; \ + case 4: __EM_ASM_1_EX(op##l, ecx); break; \ + ON64(case 8: __EM_ASM_1_EX(op##q, rcx); break;) \ + EM_ASM_END + +/* 2-operand, using "a" (dst), "d" (src) */ +#define EM_ASM_2(op) \ + EM_ASM_START(op) \ + case 1: __EM_ASM_2(op##b, al, dl); break; \ + case 2: __EM_ASM_2(op##w, ax, dx); break; \ + case 4: __EM_ASM_2(op##l, eax, edx); break; \ + ON64(case 8: __EM_ASM_2(op##q, rax, rdx); break;) \ + EM_ASM_END + +/* 2-operand, reversed */ +#define EM_ASM_2R(op, name) \ + EM_ASM_START(name) \ + case 1: __EM_ASM_2(op##b, dl, al); break; \ + case 2: __EM_ASM_2(op##w, dx, ax); break; \ + case 4: __EM_ASM_2(op##l, edx, eax); break; \ + ON64(case 8: __EM_ASM_2(op##q, rdx, rax); break;) \ + EM_ASM_END + +/* 2-operand, word only (no byte op) */ +#define EM_ASM_2W(op) \ + EM_ASM_START(op) \ + case 1: break; \ + case 2: __EM_ASM_2(op##w, ax, dx); break; \ + case 4: __EM_ASM_2(op##l, eax, edx); break; \ + ON64(case 8: __EM_ASM_2(op##q, rax, rdx); break;) \ + EM_ASM_END + +/* 2-operand, using "a" (dst) and CL (src2) */ +#define EM_ASM_2CL(op) \ + EM_ASM_START(op) \ + case 1: __EM_ASM_2(op##b, al, cl); break; \ + case 2: __EM_ASM_2(op##w, ax, cl); break; \ + case 4: __EM_ASM_2(op##l, eax, cl); break; \ + ON64(case 8: __EM_ASM_2(op##q, rax, cl); break;) \ + EM_ASM_END + +/* 3-operand, using "a" (dst), "d" (src) and CL (src2) */ +#define EM_ASM_3WCL(op) \ + EM_ASM_START(op) \ + case 1: break; \ + case 2: __EM_ASM_3(op##w, ax, dx, cl); break; \ + case 4: __EM_ASM_3(op##l, eax, edx, cl); break; \ + ON64(case 8: __EM_ASM_3(op##q, rax, rdx, cl); break;) \ + EM_ASM_END + +static int em_salc(struct x86_emulate_ctxt *ctxt) +{ + /* + * Set AL 0xFF if CF is set, or 0x00 when clear. + */ + ctxt->dst.val = 0xFF * !!(ctxt->eflags & X86_EFLAGS_CF); + return X86EMUL_CONTINUE; +} /* * XXX: inoutclob user must know where the argument is being expanded. - * Relying on CONFIG_CC_HAS_ASM_GOTO would allow us to remove _fault. + * Using asm goto would allow us to remove _fault. */ #define asm_safe(insn, inoutclob...) \ ({ \ @@ -475,12 +404,8 @@ FOP_END; \ asm volatile("1:" insn "\n" \ "2:\n" \ - ".pushsection .fixup, \"ax\"\n" \ - "3: movl $1, %[_fault]\n" \ - " jmp 2b\n" \ - ".popsection\n" \ - _ASM_EXTABLE(1b, 3b) \ - : [_fault] "+qm"(_fault) inoutclob ); \ + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_ONE_REG, %[_fault]) \ + : [_fault] "+r"(_fault) inoutclob ); \ \ _fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \ }) @@ -499,8 +424,11 @@ static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, .dst_val = ctxt->dst.val64, .src_bytes = ctxt->src.bytes, .dst_bytes = ctxt->dst.bytes, + .src_type = ctxt->src.type, + .dst_type = ctxt->dst.type, .ad_bytes = ctxt->ad_bytes, - .next_rip = ctxt->eip, + .rip = ctxt->eip, + .next_rip = ctxt->_eip, }; return ctxt->ops->intercept(ctxt, &info, stage); @@ -603,7 +531,9 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg) static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, u32 error, bool valid) { - WARN_ON(vec > 0x1f); + if (KVM_EMULATOR_BUG_ON(vec > 0x1f, ctxt)) + return X86EMUL_UNHANDLEABLE; + ctxt->exception.vector = vec; ctxt->exception.error_code = error; ctxt->exception.error_code_valid = valid; @@ -671,9 +601,10 @@ static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt) } static inline bool emul_is_noncanonical_address(u64 la, - struct x86_emulate_ctxt *ctxt) + struct x86_emulate_ctxt *ctxt, + unsigned int flags) { - return get_canonical(la, ctxt_virt_addr_bits(ctxt)) != la; + return !ctxt->ops->is_canonical_addr(ctxt, la, flags); } /* @@ -694,7 +625,6 @@ static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size) switch (alignment) { case Unaligned: - case Avx: return 1; case Aligned16: return 16; @@ -707,8 +637,8 @@ static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size) static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, struct segmented_address addr, unsigned *max_size, unsigned size, - bool write, bool fetch, - enum x86emul_mode mode, ulong *linear) + enum x86emul_mode mode, ulong *linear, + unsigned int flags) { struct desc_struct desc; bool usable; @@ -721,9 +651,9 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, *max_size = 0; switch (mode) { case X86EMUL_MODE_PROT64: - *linear = la; + *linear = la = ctxt->ops->get_untagged_addr(ctxt, la, flags); va_bits = ctxt_virt_addr_bits(ctxt); - if (get_canonical(la, va_bits) != la) + if (!__is_canonical_address(la, va_bits)) goto bad; *max_size = min_t(u64, ~0u, (1ull << va_bits) - la); @@ -737,11 +667,11 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, if (!usable) goto bad; /* code segment in protected mode or read-only data segment */ - if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8)) - || !(desc.type & 2)) && write) + if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8)) || !(desc.type & 2)) && + (flags & X86EMUL_F_WRITE)) goto bad; /* unreadable code segment */ - if (!fetch && (desc.type & 8) && !(desc.type & 2)) + if (!(flags & X86EMUL_F_FETCH) && (desc.type & 8) && !(desc.type & 2)) goto bad; lim = desc_limit_scaled(&desc); if (!(desc.type & 8) && (desc.type & 4)) { @@ -777,12 +707,11 @@ static int linearize(struct x86_emulate_ctxt *ctxt, ulong *linear) { unsigned max_size; - return __linearize(ctxt, addr, &max_size, size, write, false, - ctxt->mode, linear); + return __linearize(ctxt, addr, &max_size, size, ctxt->mode, linear, + write ? X86EMUL_F_WRITE : 0); } -static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst, - enum x86emul_mode mode) +static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst) { ulong linear; int rc; @@ -792,41 +721,72 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst, if (ctxt->op_bytes != sizeof(unsigned long)) addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1); - rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear); + rc = __linearize(ctxt, addr, &max_size, 1, ctxt->mode, &linear, + X86EMUL_F_FETCH); if (rc == X86EMUL_CONTINUE) ctxt->_eip = addr.ea; return rc; } +static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt) +{ + u64 efer; + struct desc_struct cs; + u16 selector; + u32 base3; + + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + + if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) { + /* Real mode. cpu must not have long mode active */ + if (efer & EFER_LMA) + return X86EMUL_UNHANDLEABLE; + ctxt->mode = X86EMUL_MODE_REAL; + return X86EMUL_CONTINUE; + } + + if (ctxt->eflags & X86_EFLAGS_VM) { + /* Protected/VM86 mode. cpu must not have long mode active */ + if (efer & EFER_LMA) + return X86EMUL_UNHANDLEABLE; + ctxt->mode = X86EMUL_MODE_VM86; + return X86EMUL_CONTINUE; + } + + if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS)) + return X86EMUL_UNHANDLEABLE; + + if (efer & EFER_LMA) { + if (cs.l) { + /* Proper long mode */ + ctxt->mode = X86EMUL_MODE_PROT64; + } else if (cs.d) { + /* 32 bit compatibility mode*/ + ctxt->mode = X86EMUL_MODE_PROT32; + } else { + ctxt->mode = X86EMUL_MODE_PROT16; + } + } else { + /* Legacy 32 bit / 16 bit mode */ + ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; + } + + return X86EMUL_CONTINUE; +} + static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) { - return assign_eip(ctxt, dst, ctxt->mode); + return assign_eip(ctxt, dst); } -static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, - const struct desc_struct *cs_desc) +static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst) { - enum x86emul_mode mode = ctxt->mode; - int rc; + int rc = emulator_recalc_and_set_mode(ctxt); -#ifdef CONFIG_X86_64 - if (ctxt->mode >= X86EMUL_MODE_PROT16) { - if (cs_desc->l) { - u64 efer = 0; + if (rc != X86EMUL_CONTINUE) + return rc; - ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); - if (efer & EFER_LMA) - mode = X86EMUL_MODE_PROT64; - } else - mode = X86EMUL_MODE_PROT32; /* temporary value */ - } -#endif - if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32) - mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; - rc = assign_eip(ctxt, dst, mode); - if (rc == X86EMUL_CONTINUE) - ctxt->mode = mode; - return rc; + return assign_eip(ctxt, dst); } static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) @@ -898,8 +858,8 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) * boundary check itself. Instead, we use max_size to check * against op_size. */ - rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode, - &linear); + rc = __linearize(ctxt, addr, &max_size, 0, ctxt->mode, &linear, + X86EMUL_F_FETCH); if (unlikely(rc != X86EMUL_CONTINUE)) return rc; @@ -966,7 +926,7 @@ static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg, int byteop) { void *p; - int highbyte_regs = (ctxt->rex_prefix == 0) && byteop; + int highbyte_regs = (ctxt->rex_prefix == REX_NONE) && byteop; if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8) p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1; @@ -992,56 +952,55 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, return rc; } -FASTOP2(add); -FASTOP2(or); -FASTOP2(adc); -FASTOP2(sbb); -FASTOP2(and); -FASTOP2(sub); -FASTOP2(xor); -FASTOP2(cmp); -FASTOP2(test); - -FASTOP1SRC2(mul, mul_ex); -FASTOP1SRC2(imul, imul_ex); -FASTOP1SRC2EX(div, div_ex); -FASTOP1SRC2EX(idiv, idiv_ex); - -FASTOP3WCL(shld); -FASTOP3WCL(shrd); - -FASTOP2W(imul); - -FASTOP1(not); -FASTOP1(neg); -FASTOP1(inc); -FASTOP1(dec); - -FASTOP2CL(rol); -FASTOP2CL(ror); -FASTOP2CL(rcl); -FASTOP2CL(rcr); -FASTOP2CL(shl); -FASTOP2CL(shr); -FASTOP2CL(sar); - -FASTOP2W(bsf); -FASTOP2W(bsr); -FASTOP2W(bt); -FASTOP2W(bts); -FASTOP2W(btr); -FASTOP2W(btc); - -FASTOP2(xadd); - -FASTOP2R(cmp, cmp_r); +EM_ASM_2(add); +EM_ASM_2(or); +EM_ASM_2(adc); +EM_ASM_2(sbb); +EM_ASM_2(and); +EM_ASM_2(sub); +EM_ASM_2(xor); +EM_ASM_2(cmp); +EM_ASM_2(test); +EM_ASM_2(xadd); + +EM_ASM_1SRC2(mul, mul_ex); +EM_ASM_1SRC2(imul, imul_ex); +EM_ASM_1SRC2EX(div, div_ex); +EM_ASM_1SRC2EX(idiv, idiv_ex); + +EM_ASM_3WCL(shld); +EM_ASM_3WCL(shrd); + +EM_ASM_2W(imul); + +EM_ASM_1(not); +EM_ASM_1(neg); +EM_ASM_1(inc); +EM_ASM_1(dec); + +EM_ASM_2CL(rol); +EM_ASM_2CL(ror); +EM_ASM_2CL(rcl); +EM_ASM_2CL(rcr); +EM_ASM_2CL(shl); +EM_ASM_2CL(shr); +EM_ASM_2CL(sar); + +EM_ASM_2W(bsf); +EM_ASM_2W(bsr); +EM_ASM_2W(bt); +EM_ASM_2W(bts); +EM_ASM_2W(btr); +EM_ASM_2W(btc); + +EM_ASM_2R(cmp, cmp_r); static int em_bsf_c(struct x86_emulate_ctxt *ctxt) { /* If src is zero, do not writeback, but update flags */ if (ctxt->src.val == 0) ctxt->dst.type = OP_NONE; - return fastop(ctxt, em_bsf); + return em_bsf(ctxt); } static int em_bsr_c(struct x86_emulate_ctxt *ctxt) @@ -1049,18 +1008,12 @@ static int em_bsr_c(struct x86_emulate_ctxt *ctxt) /* If src is zero, do not writeback, but update flags */ if (ctxt->src.val == 0) ctxt->dst.type = OP_NONE; - return fastop(ctxt, em_bsr); + return em_bsr(ctxt); } static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) { - u8 rc; - void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); - - flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; - asm("push %[flags]; popf; " CALL_NOSPEC - : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags)); - return rc; + return __emulate_cc(flags, condition & 0xf); } static void fetch_register_operand(struct operand *op) @@ -1079,108 +1032,7 @@ static void fetch_register_operand(struct operand *op) op->val = *(u64 *)op->addr.reg; break; } -} - -static void emulator_get_fpu(void) -{ - fpregs_lock(); - - fpregs_assert_state_consistent(); - if (test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_return(); -} - -static void emulator_put_fpu(void) -{ - fpregs_unlock(); -} - -static void read_sse_reg(sse128_t *data, int reg) -{ - emulator_get_fpu(); - switch (reg) { - case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break; - case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break; - case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break; - case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break; - case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break; - case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break; - case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break; - case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break; -#ifdef CONFIG_X86_64 - case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break; - case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break; - case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break; - case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break; - case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break; - case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break; - case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break; - case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break; -#endif - default: BUG(); - } - emulator_put_fpu(); -} - -static void write_sse_reg(sse128_t *data, int reg) -{ - emulator_get_fpu(); - switch (reg) { - case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break; - case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break; - case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break; - case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break; - case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break; - case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break; - case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break; - case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break; -#ifdef CONFIG_X86_64 - case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break; - case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break; - case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break; - case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break; - case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break; - case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break; - case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break; - case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break; -#endif - default: BUG(); - } - emulator_put_fpu(); -} - -static void read_mmx_reg(u64 *data, int reg) -{ - emulator_get_fpu(); - switch (reg) { - case 0: asm("movq %%mm0, %0" : "=m"(*data)); break; - case 1: asm("movq %%mm1, %0" : "=m"(*data)); break; - case 2: asm("movq %%mm2, %0" : "=m"(*data)); break; - case 3: asm("movq %%mm3, %0" : "=m"(*data)); break; - case 4: asm("movq %%mm4, %0" : "=m"(*data)); break; - case 5: asm("movq %%mm5, %0" : "=m"(*data)); break; - case 6: asm("movq %%mm6, %0" : "=m"(*data)); break; - case 7: asm("movq %%mm7, %0" : "=m"(*data)); break; - default: BUG(); - } - emulator_put_fpu(); -} - -static void write_mmx_reg(u64 *data, int reg) -{ - emulator_get_fpu(); - switch (reg) { - case 0: asm("movq %0, %%mm0" : : "m"(*data)); break; - case 1: asm("movq %0, %%mm1" : : "m"(*data)); break; - case 2: asm("movq %0, %%mm2" : : "m"(*data)); break; - case 3: asm("movq %0, %%mm3" : : "m"(*data)); break; - case 4: asm("movq %0, %%mm4" : : "m"(*data)); break; - case 5: asm("movq %0, %%mm5" : : "m"(*data)); break; - case 6: asm("movq %0, %%mm6" : : "m"(*data)); break; - case 7: asm("movq %0, %%mm7" : : "m"(*data)); break; - default: BUG(); - } - emulator_put_fpu(); + op->orig_val = op->val; } static int em_fninit(struct x86_emulate_ctxt *ctxt) @@ -1188,9 +1040,9 @@ static int em_fninit(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); - emulator_get_fpu(); + kvm_fpu_get(); asm volatile("fninit"); - emulator_put_fpu(); + kvm_fpu_put(); return X86EMUL_CONTINUE; } @@ -1201,9 +1053,9 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); - emulator_get_fpu(); + kvm_fpu_get(); asm volatile("fnstcw %0": "+m"(fcw)); - emulator_put_fpu(); + kvm_fpu_put(); ctxt->dst.val = fcw; @@ -1217,28 +1069,30 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); - emulator_get_fpu(); + kvm_fpu_get(); asm volatile("fnstsw %0": "+m"(fsw)); - emulator_put_fpu(); + kvm_fpu_put(); ctxt->dst.val = fsw; return X86EMUL_CONTINUE; } -static void decode_register_operand(struct x86_emulate_ctxt *ctxt, - struct operand *op) +static void __decode_register_operand(struct x86_emulate_ctxt *ctxt, + struct operand *op, int reg) { - unsigned reg = ctxt->modrm_reg; - - if (!(ctxt->d & ModRM)) - reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3); - - if (ctxt->d & Sse) { + if ((ctxt->d & Avx) && ctxt->op_bytes == 32) { + op->type = OP_YMM; + op->bytes = 32; + op->addr.xmm = reg; + kvm_read_avx_reg(reg, &op->vec_val2); + return; + } + if (ctxt->d & (Avx|Sse)) { op->type = OP_XMM; op->bytes = 16; op->addr.xmm = reg; - read_sse_reg(&op->vec_val, reg); + kvm_read_sse_reg(reg, &op->vec_val); return; } if (ctxt->d & Mmx) { @@ -1252,9 +1106,20 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt, op->type = OP_REG; op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp); - fetch_register_operand(op); - op->orig_val = op->val; +} + +static void decode_register_operand(struct x86_emulate_ctxt *ctxt, + struct operand *op) +{ + unsigned int reg; + + if (ctxt->d & ModRM) + reg = ctxt->modrm_reg; + else + reg = (ctxt->b & 7) | (ctxt->rex_bits & REX_B ? 8 : 0); + + __decode_register_operand(ctxt, op, reg); } static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg) @@ -1271,9 +1136,9 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, int rc = X86EMUL_CONTINUE; ulong modrm_ea = 0; - ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */ - index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */ - base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */ + ctxt->modrm_reg = (ctxt->rex_bits & REX_R ? 8 : 0); + index_reg = (ctxt->rex_bits & REX_X ? 8 : 0); + base_reg = (ctxt->rex_bits & REX_B ? 8 : 0); ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6; ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3; @@ -1281,24 +1146,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, ctxt->modrm_seg = VCPU_SREG_DS; if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) { - op->type = OP_REG; - op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; - op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, - ctxt->d & ByteOp); - if (ctxt->d & Sse) { - op->type = OP_XMM; - op->bytes = 16; - op->addr.xmm = ctxt->modrm_rm; - read_sse_reg(&op->vec_val, ctxt->modrm_rm); - return rc; - } - if (ctxt->d & Mmx) { - op->type = OP_MM; - op->bytes = 8; - op->addr.mm = ctxt->modrm_rm & 7; - return rc; - } - fetch_register_operand(op); + __decode_register_operand(ctxt, op, ctxt->modrm_rm); return rc; } @@ -1452,7 +1300,8 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, if (mc->pos < mc->end) goto read_cached; - WARN_ON((mc->end + size) >= sizeof(mc->data)); + if (KVM_EMULATOR_BUG_ON((mc->end + size) >= sizeof(mc->data), ctxt)) + return X86EMUL_UNHANDLEABLE; rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size, &ctxt->exception); @@ -1638,6 +1487,37 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, return linear_write_system(ctxt, addr, desc, sizeof(*desc)); } +static bool emulator_is_ssp_invalid(struct x86_emulate_ctxt *ctxt, u8 cpl) +{ + const u32 MSR_IA32_X_CET = cpl == 3 ? MSR_IA32_U_CET : MSR_IA32_S_CET; + u64 efer = 0, cet = 0, ssp = 0; + + if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET)) + return false; + + if (ctxt->ops->get_msr(ctxt, MSR_EFER, &efer)) + return true; + + /* SSP is guaranteed to be valid if the vCPU was already in 32-bit mode. */ + if (!(efer & EFER_LMA)) + return false; + + if (ctxt->ops->get_msr(ctxt, MSR_IA32_X_CET, &cet)) + return true; + + if (!(cet & CET_SHSTK_EN)) + return false; + + if (ctxt->ops->get_msr(ctxt, MSR_KVM_INTERNAL_GUEST_SSP, &ssp)) + return true; + + /* + * On transfer from 64-bit mode to compatibility mode, SSP[63:32] must + * be 0, i.e. SSP must be a 32-bit value outside of 64-bit mode. + */ + return ssp >> 32; +} + static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, u16 selector, int seg, u8 cpl, enum x86_transfer_type transfer, @@ -1718,34 +1598,57 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, goto exception; } - if (!seg_desc.p) { - err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; - goto exception; - } - dpl = seg_desc.dpl; switch (seg) { case VCPU_SREG_SS: /* * segment is not a writable data segment or segment - * selector's RPL != CPL or segment selector's RPL != CPL + * selector's RPL != CPL or DPL != CPL */ if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl) goto exception; break; case VCPU_SREG_CS: + /* + * KVM uses "none" when loading CS as part of emulating Real + * Mode exceptions and IRET (handled above). In all other + * cases, loading CS without a control transfer is a KVM bug. + */ + if (WARN_ON_ONCE(transfer == X86_TRANSFER_NONE)) + goto exception; + if (!(seg_desc.type & 8)) goto exception; - if (seg_desc.type & 4) { - /* conforming */ - if (dpl > cpl) - goto exception; - } else { - /* nonconforming */ - if (rpl > cpl || dpl != cpl) + if (transfer == X86_TRANSFER_RET) { + /* RET can never return to an inner privilege level. */ + if (rpl < cpl) goto exception; + /* Outer-privilege level return is not implemented */ + if (rpl > cpl) + return X86EMUL_UNHANDLEABLE; + } + if (transfer == X86_TRANSFER_RET || transfer == X86_TRANSFER_TASK_SWITCH) { + if (seg_desc.type & 4) { + /* conforming */ + if (dpl > rpl) + goto exception; + } else { + /* nonconforming */ + if (dpl != rpl) + goto exception; + } + } else { /* X86_TRANSFER_CALL_JMP */ + if (seg_desc.type & 4) { + /* conforming */ + if (dpl > cpl) + goto exception; + } else { + /* nonconforming */ + if (rpl > cpl || dpl != cpl) + goto exception; + } } /* in long-mode d/b must be clear if l is set */ if (seg_desc.d && seg_desc.l) { @@ -1755,6 +1658,10 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (efer & EFER_LMA) goto exception; } + if (!seg_desc.l && emulator_is_ssp_invalid(ctxt, cpl)) { + err_code = 0; + goto exception; + } /* CS(RPL) <- CPL */ selector = (selector & 0xfffc) | cpl; @@ -1762,12 +1669,6 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, case VCPU_SREG_TR: if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9)) goto exception; - old_desc = seg_desc; - seg_desc.type |= 2; /* busy */ - ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc, - sizeof(seg_desc), &ctxt->exception); - if (ret != X86EMUL_CONTINUE) - return ret; break; case VCPU_SREG_LDTR: if (seg_desc.s || seg_desc.type != 2) @@ -1777,15 +1678,20 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, /* * segment is not a data or readable code segment or * ((segment is a data or nonconforming code segment) - * and (both RPL and CPL > DPL)) + * and ((RPL > DPL) or (CPL > DPL))) */ if ((seg_desc.type & 0xa) == 0x8 || (((seg_desc.type & 0xc) != 0xc) && - (rpl > dpl && cpl > dpl))) + (rpl > dpl || cpl > dpl))) goto exception; break; } + if (!seg_desc.p) { + err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; + goto exception; + } + if (seg_desc.s) { /* mark segment as accessed */ if (!(seg_desc.type & 1)) { @@ -1800,8 +1706,18 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (ret != X86EMUL_CONTINUE) return ret; if (emul_is_noncanonical_address(get_desc_base(&seg_desc) | - ((u64)base3 << 32), ctxt)) - return emulate_gp(ctxt, 0); + ((u64)base3 << 32), ctxt, + X86EMUL_F_DT_LOAD)) + return emulate_gp(ctxt, err_code); + } + + if (seg == VCPU_SREG_TR) { + old_desc = seg_desc; + seg_desc.type |= 2; /* busy */ + ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc, + sizeof(seg_desc), &ctxt->exception); + if (ret != X86EMUL_CONTINUE) + return ret; } load: ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); @@ -1858,18 +1774,24 @@ static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op) op->addr.mem, &op->val, op->bytes); - break; case OP_MEM_STR: return segmented_write(ctxt, op->addr.mem, op->data, op->bytes * op->count); - break; case OP_XMM: - write_sse_reg(&op->vec_val, op->addr.xmm); + if (!(ctxt->d & Avx)) { + kvm_write_sse_reg(op->addr.xmm, &op->vec_val); + break; + } + /* full YMM write but with high bytes cleared */ + memset(op->valptr + 16, 0, 16); + fallthrough; + case OP_YMM: + kvm_write_avx_reg(op->addr.xmm, &op->vec_val2); break; case OP_MM: - write_mmx_reg(&op->mm_val, op->addr.mm); + kvm_write_mmx_reg(op->addr.mm, &op->mm_val); break; case OP_NONE: /* no writeback */ @@ -1880,22 +1802,22 @@ static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op) return X86EMUL_CONTINUE; } -static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes) +static int emulate_push(struct x86_emulate_ctxt *ctxt, const void *data, int len) { struct segmented_address addr; - rsp_increment(ctxt, -bytes); + rsp_increment(ctxt, -len); addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt); addr.seg = VCPU_SREG_SS; - return segmented_write(ctxt, addr, data, bytes); + return segmented_write(ctxt, addr, data, len); } static int em_push(struct x86_emulate_ctxt *ctxt) { /* Disable writeback. */ ctxt->dst.type = OP_NONE; - return push(ctxt, &ctxt->src.val, ctxt->op_bytes); + return emulate_push(ctxt, &ctxt->src.val, ctxt->op_bytes); } static int emulate_pop(struct x86_emulate_ctxt *ctxt, @@ -1923,7 +1845,8 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, void *dest, int len) { int rc; - unsigned long val, change_mask; + unsigned long val = 0; + unsigned long change_mask; int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT; int cpl = ctxt->ops->cpl(ctxt); @@ -1980,7 +1903,7 @@ static int em_enter(struct x86_emulate_ctxt *ctxt) return X86EMUL_UNHANDLEABLE; rbp = reg_read(ctxt, VCPU_REGS_RBP); - rc = push(ctxt, &rbp, stack_size(ctxt)); + rc = emulate_push(ctxt, &rbp, stack_size(ctxt)); if (rc != X86EMUL_CONTINUE) return rc; assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP), @@ -2014,14 +1937,14 @@ static int em_push_sreg(struct x86_emulate_ctxt *ctxt) static int em_pop_sreg(struct x86_emulate_ctxt *ctxt) { int seg = ctxt->src2.val; - unsigned long selector; + unsigned long selector = 0; int rc; rc = emulate_pop(ctxt, &selector, 2); if (rc != X86EMUL_CONTINUE) return rc; - if (ctxt->modrm_reg == VCPU_SREG_SS) + if (seg == VCPU_SREG_SS) ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS; if (ctxt->op_bytes > 2) rsp_increment(ctxt, ctxt->op_bytes - 2); @@ -2060,7 +1983,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) { int rc = X86EMUL_CONTINUE; int reg = VCPU_REGS_RDI; - u32 val; + u32 val = 0; while (reg >= VCPU_REGS_RAX) { if (reg == VCPU_REGS_RSP) { @@ -2238,7 +2161,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; - rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); + rc = assign_eip_far(ctxt, ctxt->src.val); /* Error handling is not implemented. */ if (rc != X86EMUL_CONTINUE) return X86EMUL_UNHANDLEABLE; @@ -2289,7 +2212,7 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) static int em_ret(struct x86_emulate_ctxt *ctxt) { int rc; - unsigned long eip; + unsigned long eip = 0; rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) @@ -2301,7 +2224,8 @@ static int em_ret(struct x86_emulate_ctxt *ctxt) static int em_ret_far(struct x86_emulate_ctxt *ctxt) { int rc; - unsigned long eip, cs; + unsigned long eip = 0; + unsigned long cs = 0; int cpl = ctxt->ops->cpl(ctxt); struct desc_struct new_desc; @@ -2311,15 +2235,12 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; - /* Outer-privilege level return is not implemented */ - if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) - return X86EMUL_UNHANDLEABLE; rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl, X86_TRANSFER_RET, &new_desc); if (rc != X86EMUL_CONTINUE) return rc; - rc = assign_eip_far(ctxt, eip, &new_desc); + rc = assign_eip_far(ctxt, eip); /* Error handling is not implemented. */ if (rc != X86EMUL_CONTINUE) return X86EMUL_UNHANDLEABLE; @@ -2345,7 +2266,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX); ctxt->src.orig_val = ctxt->src.val; ctxt->src.val = ctxt->dst.orig_val; - fastop(ctxt, em_cmp); + em_cmp(ctxt); if (ctxt->eflags & X86_EFLAGS_ZF) { /* Success: write back to memory; no update of EAX */ @@ -2378,321 +2299,19 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt) return rc; } -static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt) -{ -#ifdef CONFIG_X86_64 - return ctxt->ops->guest_has_long_mode(ctxt); -#else - return false; -#endif -} - -static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags) -{ - desc->g = (flags >> 23) & 1; - desc->d = (flags >> 22) & 1; - desc->l = (flags >> 21) & 1; - desc->avl = (flags >> 20) & 1; - desc->p = (flags >> 15) & 1; - desc->dpl = (flags >> 13) & 3; - desc->s = (flags >> 12) & 1; - desc->type = (flags >> 8) & 15; -} - -static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate, - int n) -{ - struct desc_struct desc; - int offset; - u16 selector; - - selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4); - - if (n < 3) - offset = 0x7f84 + n * 12; - else - offset = 0x7f2c + (n - 3) * 12; - - set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8)); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4)); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset)); - ctxt->ops->set_segment(ctxt, selector, &desc, 0, n); - return X86EMUL_CONTINUE; -} - -#ifdef CONFIG_X86_64 -static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate, - int n) -{ - struct desc_struct desc; - int offset; - u16 selector; - u32 base3; - - offset = 0x7e00 + n * 16; - - selector = GET_SMSTATE(u16, smstate, offset); - rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4)); - set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8)); - base3 = GET_SMSTATE(u32, smstate, offset + 12); - - ctxt->ops->set_segment(ctxt, selector, &desc, base3, n); - return X86EMUL_CONTINUE; -} -#endif - -static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, - u64 cr0, u64 cr3, u64 cr4) -{ - int bad; - u64 pcid; - - /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */ - pcid = 0; - if (cr4 & X86_CR4_PCIDE) { - pcid = cr3 & 0xfff; - cr3 &= ~0xfff; - } - - bad = ctxt->ops->set_cr(ctxt, 3, cr3); - if (bad) - return X86EMUL_UNHANDLEABLE; - - /* - * First enable PAE, long mode needs it before CR0.PG = 1 is set. - * Then enable protected mode. However, PCID cannot be enabled - * if EFER.LMA=0, so set it separately. - */ - bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE); - if (bad) - return X86EMUL_UNHANDLEABLE; - - bad = ctxt->ops->set_cr(ctxt, 0, cr0); - if (bad) - return X86EMUL_UNHANDLEABLE; - - if (cr4 & X86_CR4_PCIDE) { - bad = ctxt->ops->set_cr(ctxt, 4, cr4); - if (bad) - return X86EMUL_UNHANDLEABLE; - if (pcid) { - bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid); - if (bad) - return X86EMUL_UNHANDLEABLE; - } - - } - - return X86EMUL_CONTINUE; -} - -static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, - const char *smstate) -{ - struct desc_struct desc; - struct desc_ptr dt; - u16 selector; - u32 val, cr0, cr3, cr4; - int i; - - cr0 = GET_SMSTATE(u32, smstate, 0x7ffc); - cr3 = GET_SMSTATE(u32, smstate, 0x7ff8); - ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED; - ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0); - - for (i = 0; i < 8; i++) - *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4); - - val = GET_SMSTATE(u32, smstate, 0x7fcc); - ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1); - val = GET_SMSTATE(u32, smstate, 0x7fc8); - ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); - - selector = GET_SMSTATE(u32, smstate, 0x7fc4); - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64)); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60)); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c)); - ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR); - - selector = GET_SMSTATE(u32, smstate, 0x7fc0); - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80)); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c)); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78)); - ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR); - - dt.address = GET_SMSTATE(u32, smstate, 0x7f74); - dt.size = GET_SMSTATE(u32, smstate, 0x7f70); - ctxt->ops->set_gdt(ctxt, &dt); - - dt.address = GET_SMSTATE(u32, smstate, 0x7f58); - dt.size = GET_SMSTATE(u32, smstate, 0x7f54); - ctxt->ops->set_idt(ctxt, &dt); - - for (i = 0; i < 6; i++) { - int r = rsm_load_seg_32(ctxt, smstate, i); - if (r != X86EMUL_CONTINUE) - return r; - } - - cr4 = GET_SMSTATE(u32, smstate, 0x7f14); - - ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8)); - - return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); -} - -#ifdef CONFIG_X86_64 -static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, - const char *smstate) -{ - struct desc_struct desc; - struct desc_ptr dt; - u64 val, cr0, cr3, cr4; - u32 base3; - u16 selector; - int i, r; - - for (i = 0; i < 16; i++) - *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8); - - ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78); - ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED; - - val = GET_SMSTATE(u32, smstate, 0x7f68); - ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1); - val = GET_SMSTATE(u32, smstate, 0x7f60); - ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); - - cr0 = GET_SMSTATE(u64, smstate, 0x7f58); - cr3 = GET_SMSTATE(u64, smstate, 0x7f50); - cr4 = GET_SMSTATE(u64, smstate, 0x7f48); - ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00)); - val = GET_SMSTATE(u64, smstate, 0x7ed0); - ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA); - - selector = GET_SMSTATE(u32, smstate, 0x7e90); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94)); - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98)); - base3 = GET_SMSTATE(u32, smstate, 0x7e9c); - ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR); - - dt.size = GET_SMSTATE(u32, smstate, 0x7e84); - dt.address = GET_SMSTATE(u64, smstate, 0x7e88); - ctxt->ops->set_idt(ctxt, &dt); - - selector = GET_SMSTATE(u32, smstate, 0x7e70); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74)); - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78)); - base3 = GET_SMSTATE(u32, smstate, 0x7e7c); - ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR); - - dt.size = GET_SMSTATE(u32, smstate, 0x7e64); - dt.address = GET_SMSTATE(u64, smstate, 0x7e68); - ctxt->ops->set_gdt(ctxt, &dt); - - r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); - if (r != X86EMUL_CONTINUE) - return r; - - for (i = 0; i < 6; i++) { - r = rsm_load_seg_64(ctxt, smstate, i); - if (r != X86EMUL_CONTINUE) - return r; - } - - return X86EMUL_CONTINUE; -} -#endif - static int em_rsm(struct x86_emulate_ctxt *ctxt) { - unsigned long cr0, cr4, efer; - char buf[512]; - u64 smbase; - int ret; - - if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0) + if (!ctxt->ops->is_smm(ctxt)) return emulate_ud(ctxt); - smbase = ctxt->ops->get_smbase(ctxt); - - ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf)); - if (ret != X86EMUL_CONTINUE) - return X86EMUL_UNHANDLEABLE; - - if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0) - ctxt->ops->set_nmi_mask(ctxt, false); - - ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) & - ~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK)); - - /* - * Get back to real mode, to prepare a safe state in which to load - * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU - * supports long mode. - */ - if (emulator_has_longmode(ctxt)) { - struct desc_struct cs_desc; - - /* Zero CR4.PCIDE before CR0.PG. */ - cr4 = ctxt->ops->get_cr(ctxt, 4); - if (cr4 & X86_CR4_PCIDE) - ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE); - - /* A 32-bit code segment is required to clear EFER.LMA. */ - memset(&cs_desc, 0, sizeof(cs_desc)); - cs_desc.type = 0xb; - cs_desc.s = cs_desc.g = cs_desc.p = 1; - ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS); - } - - /* For the 64-bit case, this will clear EFER.LMA. */ - cr0 = ctxt->ops->get_cr(ctxt, 0); - if (cr0 & X86_CR0_PE) - ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE)); - - if (emulator_has_longmode(ctxt)) { - /* Clear CR4.PAE before clearing EFER.LME. */ - cr4 = ctxt->ops->get_cr(ctxt, 4); - if (cr4 & X86_CR4_PAE) - ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE); - - /* And finally go back to 32-bit mode. */ - efer = 0; - ctxt->ops->set_msr(ctxt, MSR_EFER, efer); - } - - /* - * Give pre_leave_smm() a chance to make ISA-specific changes to the - * vCPU state (e.g. enter guest mode) before loading state from the SMM - * state-save area. - */ - if (ctxt->ops->pre_leave_smm(ctxt, buf)) - return X86EMUL_UNHANDLEABLE; - -#ifdef CONFIG_X86_64 - if (emulator_has_longmode(ctxt)) - ret = rsm_load_state_64(ctxt, buf); - else -#endif - ret = rsm_load_state_32(ctxt, buf); - - if (ret != X86EMUL_CONTINUE) { - /* FIXME: should triple fault */ - return X86EMUL_UNHANDLEABLE; - } - - ctxt->ops->post_leave_smm(ctxt); + if (ctxt->ops->leave_smm(ctxt)) + ctxt->ops->triple_fault(ctxt); - return X86EMUL_CONTINUE; + return emulator_recalc_and_set_mode(ctxt); } static void -setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, - struct desc_struct *cs, struct desc_struct *ss) +setup_syscalls_segments(struct desc_struct *cs, struct desc_struct *ss) { cs->l = 0; /* will be adjusted later */ set_desc_base(cs, 0); /* flat segment */ @@ -2717,50 +2336,6 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, ss->avl = 0; } -static bool vendor_intel(struct x86_emulate_ctxt *ctxt) -{ - u32 eax, ebx, ecx, edx; - - eax = ecx = 0; - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true); - return is_guest_vendor_intel(ebx, ecx, edx); -} - -static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt) -{ - const struct x86_emulate_ops *ops = ctxt->ops; - u32 eax, ebx, ecx, edx; - - /* - * syscall should always be enabled in longmode - so only become - * vendor specific (cpuid) if other modes are active... - */ - if (ctxt->mode == X86EMUL_MODE_PROT64) - return true; - - eax = 0x00000000; - ecx = 0x00000000; - ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true); - /* - * remark: Intel CPUs only support "syscall" in 64bit longmode. Also a - * 64bit guest with a 32bit compat-app running will #UD !! While this - * behaviour can be fixed (by emulating) into AMD response - CPUs of - * AMD can't behave like Intel. - */ - if (is_guest_vendor_intel(ebx, ecx, edx)) - return false; - - if (is_guest_vendor_amd(ebx, ecx, edx) || - is_guest_vendor_hygon(ebx, ecx, edx)) - return true; - - /* - * default: (not Intel, not AMD, not Hygon), apply Intel's - * stricter rules... - */ - return false; -} - static int em_syscall(struct x86_emulate_ctxt *ctxt) { const struct x86_emulate_ops *ops = ctxt->ops; @@ -2774,14 +2349,22 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) ctxt->mode == X86EMUL_MODE_VM86) return emulate_ud(ctxt); - if (!(em_syscall_is_enabled(ctxt))) + /* + * Intel compatible CPUs only support SYSCALL in 64-bit mode, whereas + * AMD allows SYSCALL in any flavor of protected mode. Note, it's + * infeasible to emulate Intel behavior when running on AMD hardware, + * as SYSCALL won't fault in the "wrong" mode, i.e. there is no #UD + * for KVM to trap-and-emulate, unlike emulating AMD on Intel. + */ + if (ctxt->mode != X86EMUL_MODE_PROT64 && + ctxt->ops->guest_cpuid_is_intel_compatible(ctxt)) return emulate_ud(ctxt); ops->get_msr(ctxt, MSR_EFER, &efer); if (!(efer & EFER_SCE)) return emulate_ud(ctxt); - setup_syscalls_segments(ctxt, &cs, &ss); + setup_syscalls_segments(&cs, &ss); ops->get_msr(ctxt, MSR_STAR, &msr_data); msr_data >>= 32; cs_sel = (u16)(msr_data & 0xfffc); @@ -2834,11 +2417,11 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) return emulate_gp(ctxt, 0); /* - * Not recognized on AMD in compat mode (but is recognized in legacy - * mode). + * Intel's architecture allows SYSENTER in compatibility mode, but AMD + * does not. Note, AMD does allow SYSENTER in legacy protected mode. */ - if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA) - && !vendor_intel(ctxt)) + if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA) && + !ctxt->ops->guest_cpuid_is_intel_compatible(ctxt)) return emulate_ud(ctxt); /* sysenter/sysexit have not been tested in 64bit mode. */ @@ -2849,7 +2432,7 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) if ((msr_data & 0xfffc) == 0x0) return emulate_gp(ctxt, 0); - setup_syscalls_segments(ctxt, &cs, &ss); + setup_syscalls_segments(&cs, &ss); ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF); cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK; ss_sel = cs_sel + 8; @@ -2867,6 +2450,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data : (u32)msr_data; + if (efer & EFER_LMA) + ctxt->mode = X86EMUL_MODE_PROT64; return X86EMUL_CONTINUE; } @@ -2884,9 +2469,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ctxt->mode == X86EMUL_MODE_VM86) return emulate_gp(ctxt, 0); - setup_syscalls_segments(ctxt, &cs, &ss); + setup_syscalls_segments(&cs, &ss); - if ((ctxt->rex_prefix & 0x8) != 0x0) + if (ctxt->rex_bits & REX_W) usermode = X86EMUL_MODE_PROT64; else usermode = X86EMUL_MODE_PROT32; @@ -2913,8 +2498,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ss_sel = cs_sel + 8; cs.d = 0; cs.l = 1; - if (emul_is_noncanonical_address(rcx, ctxt) || - emul_is_noncanonical_address(rdx, ctxt)) + if (emul_is_noncanonical_address(rcx, ctxt, 0) || + emul_is_noncanonical_address(rdx, ctxt, 0)) return emulate_gp(ctxt, 0); break; } @@ -2925,6 +2510,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); ctxt->_eip = rdx; + ctxt->mode = usermode; *reg_write(ctxt, VCPU_REGS_RSP) = rcx; return X86EMUL_CONTINUE; @@ -2985,8 +2571,8 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, return true; } -static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, - u16 port, u16 len) +static bool emulator_io_permitted(struct x86_emulate_ctxt *ctxt, + u16 port, u16 len) { if (ctxt->perm_ok) return true; @@ -3007,7 +2593,14 @@ static void string_registers_quirk(struct x86_emulate_ctxt *ctxt) * manner when ECX is zero due to REP-string optimizations. */ #ifdef CONFIG_X86_64 - if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt)) + u32 eax, ebx, ecx, edx; + + if (ctxt->ad_bytes != 4) + return; + + eax = ecx = 0; + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true); + if (!is_guest_vendor_intel(ebx, ecx, edx)) return; *reg_write(ctxt, VCPU_REGS_RCX) = 0; @@ -3016,7 +2609,7 @@ static void string_registers_quirk(struct x86_emulate_ctxt *ctxt) case 0xa4: /* movsb */ case 0xa5: /* movsd/w */ *reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1; - /* fall through */ + fallthrough; case 0xaa: /* stosb */ case 0xab: /* stosd/w */ *reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1; @@ -3102,8 +2695,7 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } -static int task_switch_16(struct x86_emulate_ctxt *ctxt, - u16 tss_selector, u16 old_tss_sel, +static int task_switch_16(struct x86_emulate_ctxt *ctxt, u16 old_tss_sel, ulong old_tss_base, struct desc_struct *new_desc) { struct tss_segment_16 tss_seg; @@ -3208,7 +2800,7 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, } /* - * Now load segment descriptors. If fault happenes at this stage + * Now load segment descriptors. If fault happens at this stage * it is handled in a context of new task */ ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, @@ -3241,8 +2833,7 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, return ret; } -static int task_switch_32(struct x86_emulate_ctxt *ctxt, - u16 tss_selector, u16 old_tss_sel, +static int task_switch_32(struct x86_emulate_ctxt *ctxt, u16 old_tss_sel, ulong old_tss_base, struct desc_struct *new_desc) { struct tss_segment_32 tss_seg; @@ -3350,10 +2941,9 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, old_tss_sel = 0xffff; if (next_tss_desc.type & 8) - ret = task_switch_32(ctxt, tss_selector, old_tss_sel, - old_tss_base, &next_tss_desc); + ret = task_switch_32(ctxt, old_tss_sel, old_tss_base, &next_tss_desc); else - ret = task_switch_16(ctxt, tss_selector, old_tss_sel, + ret = task_switch_16(ctxt, old_tss_sel, old_tss_base, &next_tss_desc); if (ret != X86EMUL_CONTINUE) return ret; @@ -3376,7 +2966,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, ret = em_push(ctxt); } - ops->get_dr(ctxt, 7, &dr7); + dr7 = ops->get_dr(ctxt, 7); ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN)); return ret; @@ -3441,7 +3031,7 @@ static int em_das(struct x86_emulate_ctxt *ctxt) ctxt->src.type = OP_IMM; ctxt->src.val = 0; ctxt->src.bytes = 1; - fastop(ctxt, em_or); + em_or(ctxt); ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF); if (cf) ctxt->eflags |= X86_EFLAGS_CF; @@ -3467,7 +3057,7 @@ static int em_aam(struct x86_emulate_ctxt *ctxt) ctxt->src.type = OP_IMM; ctxt->src.val = 0; ctxt->src.bytes = 1; - fastop(ctxt, em_or); + em_or(ctxt); return X86EMUL_CONTINUE; } @@ -3485,7 +3075,7 @@ static int em_aad(struct x86_emulate_ctxt *ctxt) ctxt->src.type = OP_IMM; ctxt->src.val = 0; ctxt->src.bytes = 1; - fastop(ctxt, em_or); + em_or(ctxt); return X86EMUL_CONTINUE; } @@ -3521,7 +3111,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; - rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); + rc = assign_eip_far(ctxt, ctxt->src.val); if (rc != X86EMUL_CONTINUE) goto fail; @@ -3549,7 +3139,7 @@ fail: static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) { int rc; - unsigned long eip; + unsigned long eip = 0; rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) @@ -3576,7 +3166,7 @@ static int em_xchg(struct x86_emulate_ctxt *ctxt) static int em_imul_3op(struct x86_emulate_ctxt *ctxt) { ctxt->dst.val = ctxt->src2.val; - return fastop(ctxt, em_imul); + return em_imul(ctxt); } static int em_cwd(struct x86_emulate_ctxt *ctxt) @@ -3593,8 +3183,10 @@ static int em_rdpid(struct x86_emulate_ctxt *ctxt) { u64 tsc_aux = 0; - if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux)) - return emulate_gp(ctxt, 0); + if (!ctxt->ops->guest_has_rdpid(ctxt)) + return emulate_ud(ctxt); + + ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux); ctxt->dst.val = tsc_aux; return X86EMUL_CONTINUE; } @@ -3661,11 +3253,25 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt) static int em_cr_write(struct x86_emulate_ctxt *ctxt) { - if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) + int cr_num = ctxt->modrm_reg; + int r; + + if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val)) return emulate_gp(ctxt, 0); /* Disable writeback. */ ctxt->dst.type = OP_NONE; + + if (cr_num == 0) { + /* + * CR0 write might have updated CR0.PE and/or CR0.PG + * which can affect the cpu's execution mode. + */ + r = emulator_recalc_and_set_mode(ctxt); + if (r != X86EMUL_CONTINUE) + return r; + } + return X86EMUL_CONTINUE; } @@ -3689,26 +3295,36 @@ static int em_dr_write(struct x86_emulate_ctxt *ctxt) static int em_wrmsr(struct x86_emulate_ctxt *ctxt) { + u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX); u64 msr_data; + int r; msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX) | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32); - if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data)) + r = ctxt->ops->set_msr_with_filter(ctxt, msr_index, msr_data); + + if (r == X86EMUL_PROPAGATE_FAULT) return emulate_gp(ctxt, 0); - return X86EMUL_CONTINUE; + return r; } static int em_rdmsr(struct x86_emulate_ctxt *ctxt) { + u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX); u64 msr_data; + int r; - if (ctxt->ops->get_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &msr_data)) + r = ctxt->ops->get_msr_with_filter(ctxt, msr_index, &msr_data); + + if (r == X86EMUL_PROPAGATE_FAULT) return emulate_gp(ctxt, 0); - *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data; - *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32; - return X86EMUL_CONTINUE; + if (r == X86EMUL_CONTINUE) { + *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data; + *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32; + } + return r; } static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment) @@ -3779,8 +3395,10 @@ static int em_invlpg(struct x86_emulate_ctxt *ctxt) { int rc; ulong linear; + unsigned int max_size; - rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear); + rc = __linearize(ctxt, ctxt->src.addr.mem, &max_size, 1, ctxt->mode, + &linear, X86EMUL_F_INVLPG); if (rc == X86EMUL_CONTINUE) ctxt->ops->invlpg(ctxt, linear); /* Disable writeback. */ @@ -3858,7 +3476,8 @@ static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt) if (rc != X86EMUL_CONTINUE) return rc; if (ctxt->mode == X86EMUL_MODE_PROT64 && - emul_is_noncanonical_address(desc_ptr.address, ctxt)) + emul_is_noncanonical_address(desc_ptr.address, ctxt, + X86EMUL_F_DT_LOAD)) return emulate_gp(ctxt, 0); if (lgdt) ctxt->ops->set_gdt(ctxt, &desc_ptr); @@ -4020,6 +3639,12 @@ static int em_clflush(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_clflushopt(struct x86_emulate_ctxt *ctxt) +{ + /* emulating clflushopt regardless of cpuid */ + return X86EMUL_CONTINUE; +} + static int em_movsxd(struct x86_emulate_ctxt *ctxt) { ctxt->dst.val = (s32) ctxt->src.val; @@ -4090,11 +3715,11 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; - emulator_get_fpu(); + kvm_fpu_get(); rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); - emulator_put_fpu(); + kvm_fpu_put(); if (rc != X86EMUL_CONTINUE) return rc; @@ -4138,7 +3763,7 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; - emulator_get_fpu(); + kvm_fpu_get(); if (size < __fxstate_size(16)) { rc = fxregs_fixup(&fx_state, size); @@ -4155,7 +3780,7 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state)); out: - emulator_put_fpu(); + kvm_fpu_put(); return rc; } @@ -4164,6 +3789,9 @@ static int em_xsetbv(struct x86_emulate_ctxt *ctxt) { u32 eax, ecx, edx; + if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSXSAVE)) + return emulate_ud(ctxt); + eax = reg_read(ctxt, VCPU_REGS_RAX); edx = reg_read(ctxt, VCPU_REGS_RDX); ecx = reg_read(ctxt, VCPU_REGS_RCX); @@ -4186,7 +3814,7 @@ static bool valid_cr(int nr) } } -static int check_cr_read(struct x86_emulate_ctxt *ctxt) +static int check_cr_access(struct x86_emulate_ctxt *ctxt) { if (!valid_cr(ctxt->modrm_reg)) return emulate_ud(ctxt); @@ -4194,90 +3822,6 @@ static int check_cr_read(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -static int check_cr_write(struct x86_emulate_ctxt *ctxt) -{ - u64 new_val = ctxt->src.val64; - int cr = ctxt->modrm_reg; - u64 efer = 0; - - static u64 cr_reserved_bits[] = { - 0xffffffff00000000ULL, - 0, 0, 0, /* CR3 checked later */ - CR4_RESERVED_BITS, - 0, 0, 0, - CR8_RESERVED_BITS, - }; - - if (!valid_cr(cr)) - return emulate_ud(ctxt); - - if (new_val & cr_reserved_bits[cr]) - return emulate_gp(ctxt, 0); - - switch (cr) { - case 0: { - u64 cr4; - if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) || - ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD))) - return emulate_gp(ctxt, 0); - - cr4 = ctxt->ops->get_cr(ctxt, 4); - ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); - - if ((new_val & X86_CR0_PG) && (efer & EFER_LME) && - !(cr4 & X86_CR4_PAE)) - return emulate_gp(ctxt, 0); - - break; - } - case 3: { - u64 rsvd = 0; - - ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); - if (efer & EFER_LMA) { - u64 maxphyaddr; - u32 eax, ebx, ecx, edx; - - eax = 0x80000008; - ecx = 0; - if (ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, - &edx, true)) - maxphyaddr = eax & 0xff; - else - maxphyaddr = 36; - rsvd = rsvd_bits(maxphyaddr, 63); - if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PCIDE) - rsvd &= ~X86_CR3_PCID_NOFLUSH; - } - - if (new_val & rsvd) - return emulate_gp(ctxt, 0); - - break; - } - case 4: { - ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); - - if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE)) - return emulate_gp(ctxt, 0); - - break; - } - } - - return X86EMUL_CONTINUE; -} - -static int check_dr7_gd(struct x86_emulate_ctxt *ctxt) -{ - unsigned long dr7; - - ctxt->ops->get_dr(ctxt, 7, &dr7); - - /* Check if DR7.Global_Enable is set */ - return dr7 & (1 << 13); -} - static int check_dr_read(struct x86_emulate_ctxt *ctxt) { int dr = ctxt->modrm_reg; @@ -4290,12 +3834,12 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt) if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5)) return emulate_ud(ctxt); - if (check_dr7_gd(ctxt)) { + if (ctxt->ops->get_dr(ctxt, 7) & DR7_GD) { ulong dr6; - ctxt->ops->get_dr(ctxt, 6, &dr6); + dr6 = ctxt->ops->get_dr(ctxt, 6); dr6 &= ~DR_TRAP_BITS; - dr6 |= DR6_BD | DR6_RTM; + dr6 |= DR6_BD | DR6_ACTIVE_LOW; ctxt->ops->set_dr(ctxt, 6, dr6); return emulate_db(ctxt); } @@ -4342,7 +3886,7 @@ static int check_rdtsc(struct x86_emulate_ctxt *ctxt) u64 cr4 = ctxt->ops->get_cr(ctxt, 4); if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt)) - return emulate_ud(ctxt); + return emulate_gp(ctxt, 0); return X86EMUL_CONTINUE; } @@ -4359,8 +3903,13 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt) if (enable_vmware_backdoor && is_vmware_backdoor_pmc(rcx)) return X86EMUL_CONTINUE; + /* + * If CR4.PCE is set, the SDM requires CPL=0 or CR0.PE=0. The CR0.PE + * check however is unnecessary because CPL is always 0 outside + * protected mode. + */ if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || - ctxt->ops->check_pmc(ctxt, rcx)) + ctxt->ops->check_rdpmc_early(ctxt, rcx)) return emulate_gp(ctxt, 0); return X86EMUL_CONTINUE; @@ -4369,7 +3918,7 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt) static int check_perm_in(struct x86_emulate_ctxt *ctxt) { ctxt->dst.bytes = min(ctxt->dst.bytes, 4u); - if (!emulator_io_permited(ctxt, ctxt->src.val, ctxt->dst.bytes)) + if (!emulator_io_permitted(ctxt, ctxt->src.val, ctxt->dst.bytes)) return emulate_gp(ctxt, 0); return X86EMUL_CONTINUE; @@ -4378,7 +3927,7 @@ static int check_perm_in(struct x86_emulate_ctxt *ctxt) static int check_perm_out(struct x86_emulate_ctxt *ctxt) { ctxt->src.bytes = min(ctxt->src.bytes, 4u); - if (!emulator_io_permited(ctxt, ctxt->dst.val, ctxt->src.bytes)) + if (!emulator_io_permitted(ctxt, ctxt->dst.val, ctxt->src.bytes)) return emulate_gp(ctxt, 0); return X86EMUL_CONTINUE; @@ -4396,7 +3945,6 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) } #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } -#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } #define II(_f, _e, _i) \ { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i } #define IIP(_f, _e, _i, _p) \ @@ -4411,9 +3959,11 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define I2bvIP(_f, _e, _i, _p) \ IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p) -#define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \ - F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ - F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) +#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ + I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ + I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) + +static const struct opcode ud = I(SrcNone, emulate_ud); static const struct opcode group7_rm0[] = { N, @@ -4451,14 +4001,14 @@ static const struct opcode group7_rm7[] = { }; static const struct opcode group1[] = { - F(Lock, em_add), - F(Lock | PageTable, em_or), - F(Lock, em_adc), - F(Lock, em_sbb), - F(Lock | PageTable, em_and), - F(Lock, em_sub), - F(Lock, em_xor), - F(NoWrite, em_cmp), + I(Lock, em_add), + I(Lock | PageTable, em_or), + I(Lock, em_adc), + I(Lock, em_sbb), + I(Lock | PageTable, em_and), + I(Lock, em_sub), + I(Lock, em_xor), + I(NoWrite, em_cmp), }; static const struct opcode group1A[] = { @@ -4466,40 +4016,40 @@ static const struct opcode group1A[] = { }; static const struct opcode group2[] = { - F(DstMem | ModRM, em_rol), - F(DstMem | ModRM, em_ror), - F(DstMem | ModRM, em_rcl), - F(DstMem | ModRM, em_rcr), - F(DstMem | ModRM, em_shl), - F(DstMem | ModRM, em_shr), - F(DstMem | ModRM, em_shl), - F(DstMem | ModRM, em_sar), + I(DstMem | ModRM, em_rol), + I(DstMem | ModRM, em_ror), + I(DstMem | ModRM, em_rcl), + I(DstMem | ModRM, em_rcr), + I(DstMem | ModRM, em_shl), + I(DstMem | ModRM, em_shr), + I(DstMem | ModRM, em_shl), + I(DstMem | ModRM, em_sar), }; static const struct opcode group3[] = { - F(DstMem | SrcImm | NoWrite, em_test), - F(DstMem | SrcImm | NoWrite, em_test), - F(DstMem | SrcNone | Lock, em_not), - F(DstMem | SrcNone | Lock, em_neg), - F(DstXacc | Src2Mem, em_mul_ex), - F(DstXacc | Src2Mem, em_imul_ex), - F(DstXacc | Src2Mem, em_div_ex), - F(DstXacc | Src2Mem, em_idiv_ex), + I(DstMem | SrcImm | NoWrite, em_test), + I(DstMem | SrcImm | NoWrite, em_test), + I(DstMem | SrcNone | Lock, em_not), + I(DstMem | SrcNone | Lock, em_neg), + I(DstXacc | Src2Mem, em_mul_ex), + I(DstXacc | Src2Mem, em_imul_ex), + I(DstXacc | Src2Mem, em_div_ex), + I(DstXacc | Src2Mem, em_idiv_ex), }; static const struct opcode group4[] = { - F(ByteOp | DstMem | SrcNone | Lock, em_inc), - F(ByteOp | DstMem | SrcNone | Lock, em_dec), + I(ByteOp | DstMem | SrcNone | Lock, em_inc), + I(ByteOp | DstMem | SrcNone | Lock, em_dec), N, N, N, N, N, N, }; static const struct opcode group5[] = { - F(DstMem | SrcNone | Lock, em_inc), - F(DstMem | SrcNone | Lock, em_dec), - I(SrcMem | NearBranch, em_call_near_abs), - I(SrcMemFAddr | ImplicitOps, em_call_far), - I(SrcMem | NearBranch, em_jmp_abs), - I(SrcMemFAddr | ImplicitOps, em_jmp_far), + I(DstMem | SrcNone | Lock, em_inc), + I(DstMem | SrcNone | Lock, em_dec), + I(SrcMem | NearBranch | IsBranch | ShadowStack, em_call_near_abs), + I(SrcMemFAddr | ImplicitOps | IsBranch | ShadowStack, em_call_far), + I(SrcMem | NearBranch | IsBranch, em_jmp_abs), + I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far), I(SrcMem | Stack | TwoMemOp, em_push), D(Undefined), }; @@ -4531,10 +4081,10 @@ static const struct group_dual group7 = { { static const struct opcode group8[] = { N, N, N, N, - F(DstMem | SrcImmByte | NoWrite, em_bt), - F(DstMem | SrcImmByte | Lock | PageTable, em_bts), - F(DstMem | SrcImmByte | Lock, em_btr), - F(DstMem | SrcImmByte | Lock | PageTable, em_btc), + I(DstMem | SrcImmByte | NoWrite, em_bt), + I(DstMem | SrcImmByte | Lock | PageTable, em_bts), + I(DstMem | SrcImmByte | Lock, em_btr), + I(DstMem | SrcImmByte | Lock | PageTable, em_btc), }; /* @@ -4542,7 +4092,7 @@ static const struct opcode group8[] = { * from the register case of group9. */ static const struct gprefix pfx_0f_c7_7 = { - N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp), + N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid), }; @@ -4559,7 +4109,7 @@ static const struct opcode group11[] = { }; static const struct gprefix pfx_0f_ae_7 = { - I(SrcMem | ByteOp, em_clflush), N, N, N, + I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N, }; static const struct group_dual group15 = { { @@ -4571,7 +4121,7 @@ static const struct group_dual group15 = { { } }; static const struct gprefix pfx_0f_6f_0f_7f = { - I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), + I(Mmx, em_mov), I(Sse | Avx | Aligned, em_mov), N, I(Sse | Avx | Unaligned, em_mov), }; static const struct instr_dual instr_dual_0f_2b = { @@ -4590,8 +4140,8 @@ static const struct gprefix pfx_0f_28_0f_29 = { I(Aligned, em_mov), I(Aligned, em_mov), N, N, }; -static const struct gprefix pfx_0f_e7 = { - N, I(Sse, em_mov), N, N, +static const struct gprefix pfx_0f_e7_0f_38_2a = { + N, I(Sse | Avx, em_mov), N, N, }; static const struct escape escape_d9 = { { @@ -4665,33 +4215,37 @@ static const struct mode_dual mode_dual_63 = { N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd) }; +static const struct instr_dual instr_dual_8d = { + D(DstReg | SrcMem | ModRM | NoAccess), N +}; + static const struct opcode opcode_table[256] = { /* 0x00 - 0x07 */ - F6ALU(Lock, em_add), + I6ALU(Lock, em_add), I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg), /* 0x08 - 0x0F */ - F6ALU(Lock | PageTable, em_or), + I6ALU(Lock | PageTable, em_or), I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg), N, /* 0x10 - 0x17 */ - F6ALU(Lock, em_adc), + I6ALU(Lock, em_adc), I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg), I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg), /* 0x18 - 0x1F */ - F6ALU(Lock, em_sbb), + I6ALU(Lock, em_sbb), I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg), I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg), /* 0x20 - 0x27 */ - F6ALU(Lock | PageTable, em_and), N, N, + I6ALU(Lock | PageTable, em_and), N, N, /* 0x28 - 0x2F */ - F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), + I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), /* 0x30 - 0x37 */ - F6ALU(Lock, em_xor), N, N, + I6ALU(Lock, em_xor), N, N, /* 0x38 - 0x3F */ - F6ALU(NoWrite, em_cmp), N, N, + I6ALU(NoWrite, em_cmp), N, N, /* 0x40 - 0x4F */ - X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)), + X8(I(DstReg, em_inc)), X8(I(DstReg, em_dec)), /* 0x50 - 0x57 */ X8(I(SrcReg | Stack, em_push)), /* 0x58 - 0x5F */ @@ -4709,26 +4263,26 @@ static const struct opcode opcode_table[256] = { I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */ I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */ /* 0x70 - 0x7F */ - X16(D(SrcImmByte | NearBranch)), + X16(D(SrcImmByte | NearBranch | IsBranch)), /* 0x80 - 0x87 */ G(ByteOp | DstMem | SrcImm, group1), G(DstMem | SrcImm, group1), G(ByteOp | DstMem | SrcImm | No64, group1), G(DstMem | SrcImmByte, group1), - F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test), + I2bv(DstMem | SrcReg | ModRM | NoWrite, em_test), I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg), /* 0x88 - 0x8F */ I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov), I2bv(DstReg | SrcMem | ModRM | Mov, em_mov), I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg), - D(ModRM | SrcMem | NoAccess | DstReg), + ID(0, &instr_dual_8d), I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm), G(0, group1A), /* 0x90 - 0x97 */ DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)), /* 0x98 - 0x9F */ D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), - I(SrcImmFAddr | No64, em_call_far), N, + I(SrcImmFAddr | No64 | IsBranch | ShadowStack, em_call_far), N, II(ImplicitOps | Stack, em_pushf, pushf), II(ImplicitOps | Stack, em_popf, popf), I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf), @@ -4736,47 +4290,50 @@ static const struct opcode opcode_table[256] = { I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov), - F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r), + I2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r), /* 0xA8 - 0xAF */ - F2bv(DstAcc | SrcImm | NoWrite, em_test), + I2bv(DstAcc | SrcImm | NoWrite, em_test), I2bv(SrcAcc | DstDI | Mov | String, em_mov), I2bv(SrcSI | DstAcc | Mov | String, em_mov), - F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r), + I2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r), /* 0xB0 - 0xB7 */ X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), /* 0xB8 - 0xBF */ X8(I(DstReg | SrcImm64 | Mov, em_mov)), /* 0xC0 - 0xC7 */ G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2), - I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm), - I(ImplicitOps | NearBranch, em_ret), + I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch | ShadowStack, em_ret_near_imm), + I(ImplicitOps | NearBranch | IsBranch | ShadowStack, em_ret), I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg), I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg), G(ByteOp, group11), G(0, group11), /* 0xC8 - 0xCF */ - I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave), - I(ImplicitOps | SrcImmU16, em_ret_far_imm), - I(ImplicitOps, em_ret_far), - D(ImplicitOps), DI(SrcImmByte, intn), - D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), + I(Stack | SrcImmU16 | Src2ImmByte, em_enter), + I(Stack, em_leave), + I(ImplicitOps | SrcImmU16 | IsBranch | ShadowStack, em_ret_far_imm), + I(ImplicitOps | IsBranch | ShadowStack, em_ret_far), + D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch | ShadowStack, intn), + D(ImplicitOps | No64 | IsBranch), + II(ImplicitOps | IsBranch | ShadowStack, em_iret, iret), /* 0xD0 - 0xD7 */ G(Src2One | ByteOp, group2), G(Src2One, group2), G(Src2CL | ByteOp, group2), G(Src2CL, group2), I(DstAcc | SrcImmUByte | No64, em_aam), I(DstAcc | SrcImmUByte | No64, em_aad), - F(DstAcc | ByteOp | No64, em_salc), + I(DstAcc | ByteOp | No64, em_salc), I(DstAcc | SrcXLat | ByteOp, em_mov), /* 0xD8 - 0xDF */ N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N, /* 0xE0 - 0xE7 */ - X3(I(SrcImmByte | NearBranch, em_loop)), - I(SrcImmByte | NearBranch, em_jcxz), + X3(I(SrcImmByte | NearBranch | IsBranch, em_loop)), + I(SrcImmByte | NearBranch | IsBranch, em_jcxz), I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in), I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out), /* 0xE8 - 0xEF */ - I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch), - I(SrcImmFAddr | No64, em_jmp_far), - D(SrcImmByte | ImplicitOps | NearBranch), + I(SrcImm | NearBranch | IsBranch | ShadowStack, em_call), + D(SrcImm | ImplicitOps | NearBranch | IsBranch), + I(SrcImmFAddr | No64 | IsBranch, em_jmp_far), + D(SrcImmByte | ImplicitOps | NearBranch | IsBranch), I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in), I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out), /* 0xF0 - 0xF7 */ @@ -4792,13 +4349,13 @@ static const struct opcode opcode_table[256] = { static const struct opcode twobyte_table[256] = { /* 0x00 - 0x0F */ G(0, group6), GD(0, &group7), N, N, - N, I(ImplicitOps | EmulateOnUD, em_syscall), + N, I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack, em_syscall), II(ImplicitOps | Priv, em_clts, clts), N, DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N, /* 0x10 - 0x1F */ - GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_10_0f_11), - GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_10_0f_11), + GP(ModRM | DstReg | SrcMem | Mov | Sse | Avx, &pfx_0f_10_0f_11), + GP(ModRM | DstMem | SrcReg | Mov | Sse | Avx, &pfx_0f_10_0f_11), N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 4 * prefetch + 4 * reserved NOP */ D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N, @@ -4807,24 +4364,24 @@ static const struct opcode twobyte_table[256] = { D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */ D(ImplicitOps | ModRM | SrcMem | NoAccess), /* NOP + 7 * reserved NOP */ /* 0x20 - 0x2F */ - DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read), + DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_access), DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read), IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write, - check_cr_write), + check_cr_access), IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write, check_dr_write), N, N, N, N, - GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29), - GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29), - N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b), + GP(ModRM | DstReg | SrcMem | Mov | Sse | Avx, &pfx_0f_28_0f_29), + GP(ModRM | DstMem | SrcReg | Mov | Sse | Avx, &pfx_0f_28_0f_29), + N, GP(ModRM | DstMem | SrcReg | Mov | Sse | Avx, &pfx_0f_2b), N, N, N, N, /* 0x30 - 0x3F */ II(ImplicitOps | Priv, em_wrmsr, wrmsr), IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), II(ImplicitOps | Priv, em_rdmsr, rdmsr), IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc), - I(ImplicitOps | EmulateOnUD, em_sysenter), - I(ImplicitOps | Priv | EmulateOnUD, em_sysexit), + I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack, em_sysenter), + I(ImplicitOps | Priv | EmulateOnUD | IsBranch | ShadowStack, em_sysexit), N, N, N, N, N, N, N, N, N, N, /* 0x40 - 0x4F */ @@ -4842,38 +4399,38 @@ static const struct opcode twobyte_table[256] = { N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f), /* 0x80 - 0x8F */ - X16(D(SrcImm | NearBranch)), + X16(D(SrcImm | NearBranch | IsBranch)), /* 0x90 - 0x9F */ X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), /* 0xA0 - 0xA7 */ I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), II(ImplicitOps, em_cpuid, cpuid), - F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt), - F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld), - F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, + I(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt), + I(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld), + I(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, /* 0xA8 - 0xAF */ I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), II(EmulateOnUD | ImplicitOps, em_rsm, rsm), - F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), - F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), - F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), - GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul), + I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), + I(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), + I(DstMem | SrcReg | Src2CL | ModRM, em_shrd), + GD(0, &group15), I(DstReg | SrcMem | ModRM, em_imul), /* 0xB0 - 0xB7 */ I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg), I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), - F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), + I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), /* 0xB8 - 0xBF */ N, N, G(BitOp, group8), - F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), + I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), I(DstReg | SrcMem | ModRM, em_bsf_c), I(DstReg | SrcMem | ModRM, em_bsr_c), D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), /* 0xC0 - 0xC7 */ - F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd), + I2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd), N, ID(0, &instr_dual_0f_c3), N, N, N, GD(0, &group9), /* 0xC8 - 0xCF */ @@ -4881,7 +4438,7 @@ static const struct opcode twobyte_table[256] = { /* 0xD0 - 0xDF */ N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, /* 0xE0 - 0xEF */ - N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7), + N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7_0f_38_2a), N, N, N, N, N, N, N, N, /* 0xF0 - 0xFF */ N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N @@ -4896,11 +4453,11 @@ static const struct instr_dual instr_dual_0f_38_f1 = { }; static const struct gprefix three_byte_0f_38_f0 = { - ID(0, &instr_dual_0f_38_f0), N, N, N + ID(0, &instr_dual_0f_38_f0), ID(0, &instr_dual_0f_38_f0), N, N }; static const struct gprefix three_byte_0f_38_f1 = { - ID(0, &instr_dual_0f_38_f1), N, N, N + ID(0, &instr_dual_0f_38_f1), ID(0, &instr_dual_0f_38_f1), N, N }; /* @@ -4908,8 +4465,13 @@ static const struct gprefix three_byte_0f_38_f1 = { * byte. */ static const struct opcode opcode_map_0f_38[256] = { - /* 0x00 - 0x7f */ - X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), + /* 0x00 - 0x1f */ + X16(N), X16(N), + /* 0x20 - 0x2f */ + X8(N), + X2(N), GP(SrcReg | DstMem | ModRM | Mov | Aligned, &pfx_0f_e7_0f_38_2a), N, N, N, N, N, + /* 0x30 - 0x7f */ + X16(N), X16(N), X16(N), X16(N), X16(N), /* 0x80 - 0xef */ X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), /* 0xf0 - 0xf1 */ @@ -4935,6 +4497,60 @@ static const struct opcode opcode_map_0f_38[256] = { #undef I2bvIP #undef I6ALU +static bool is_shstk_instruction(struct x86_emulate_ctxt *ctxt) +{ + return ctxt->d & ShadowStack; +} + +static bool is_ibt_instruction(struct x86_emulate_ctxt *ctxt) +{ + u64 flags = ctxt->d; + + if (!(flags & IsBranch)) + return false; + + /* + * All far JMPs and CALLs (including SYSCALL, SYSENTER, and INTn) are + * indirect and thus affect IBT state. All far RETs (including SYSEXIT + * and IRET) are protected via Shadow Stacks and thus don't affect IBT + * state. IRET #GPs when returning to virtual-8086 and IBT or SHSTK is + * enabled, but that should be handled by IRET emulation (in the very + * unlikely scenario that KVM adds support for fully emulating IRET). + */ + if (!(flags & NearBranch)) + return ctxt->execute != em_iret && + ctxt->execute != em_ret_far && + ctxt->execute != em_ret_far_imm && + ctxt->execute != em_sysexit; + + switch (flags & SrcMask) { + case SrcReg: + case SrcMem: + case SrcMem16: + case SrcMem32: + return true; + case SrcMemFAddr: + case SrcImmFAddr: + /* Far branches should be handled above. */ + WARN_ON_ONCE(1); + return true; + case SrcNone: + case SrcImm: + case SrcImmByte: + /* + * Note, ImmU16 is used only for the stack adjustment operand on ENTER + * and RET instructions. ENTER isn't a branch and RET FAR is handled + * by the NearBranch check above. RET itself isn't an indirect branch. + */ + case SrcImmU16: + return false; + default: + WARN_ONCE(1, "Unexpected Src operand '%llx' on branch", + flags & SrcMask); + return false; + } +} + static unsigned imm_size(struct x86_emulate_ctxt *ctxt) { unsigned size; @@ -5014,14 +4630,12 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); fetch_register_operand(op); - op->orig_val = op->val; break; case OpAccLo: op->type = OP_REG; op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes; op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); fetch_register_operand(op); - op->orig_val = op->val; break; case OpAccHi: if (ctxt->d & ByteOp) { @@ -5032,7 +4646,6 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, op->bytes = ctxt->op_bytes; op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX); fetch_register_operand(op); - op->orig_val = op->val; break; case OpDI: op->type = OP_MEM; @@ -5151,12 +4764,87 @@ done: return rc; } -int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) +static int x86_decode_avx(struct x86_emulate_ctxt *ctxt, + u8 vex_1st, u8 vex_2nd, struct opcode *opcode) +{ + u8 vex_3rd, map, pp, l, v; + int rc = X86EMUL_CONTINUE; + + if (ctxt->rep_prefix || ctxt->op_prefix || ctxt->rex_prefix) + goto ud; + + if (vex_1st == 0xc5) { + /* Expand RVVVVlpp to VEX3 format */ + vex_3rd = vex_2nd & ~0x80; /* VVVVlpp from VEX2, w=0 */ + vex_2nd = (vex_2nd & 0x80) | 0x61; /* R from VEX2, X=1 B=1 mmmmm=00001 */ + } else { + vex_3rd = insn_fetch(u8, ctxt); + } + + /* vex_2nd = RXBmmmmm, vex_3rd = wVVVVlpp. Fix polarity */ + vex_2nd ^= 0xE0; /* binary 11100000 */ + vex_3rd ^= 0x78; /* binary 01111000 */ + + ctxt->rex_prefix = REX_PREFIX; + ctxt->rex_bits = (vex_2nd & 0xE0) >> 5; /* RXB */ + ctxt->rex_bits |= (vex_3rd & 0x80) >> 4; /* w */ + if (ctxt->rex_bits && ctxt->mode != X86EMUL_MODE_PROT64) + goto ud; + + map = vex_2nd & 0x1f; + v = (vex_3rd >> 3) & 0xf; + l = vex_3rd & 0x4; + pp = vex_3rd & 0x3; + + ctxt->b = insn_fetch(u8, ctxt); + switch (map) { + case 1: + ctxt->opcode_len = 2; + *opcode = twobyte_table[ctxt->b]; + break; + case 2: + ctxt->opcode_len = 3; + *opcode = opcode_map_0f_38[ctxt->b]; + break; + case 3: + /* no 0f 3a instructions are supported yet */ + return X86EMUL_UNHANDLEABLE; + default: + goto ud; + } + + /* + * No three operand instructions are supported yet; those that + * *are* marked with the Avx flag reserve the VVVV flag. + */ + if (v) + goto ud; + + if (l) + ctxt->op_bytes = 32; + else + ctxt->op_bytes = 16; + + switch (pp) { + case 0: break; + case 1: ctxt->op_prefix = true; break; + case 2: ctxt->rep_prefix = 0xf3; break; + case 3: ctxt->rep_prefix = 0xf2; break; + } + +done: + return rc; +ud: + *opcode = ud; + return rc; +} + +int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type) { int rc = X86EMUL_CONTINUE; int mode = ctxt->mode; int def_op_bytes, def_ad_bytes, goffset, simd_prefix; - bool op_prefix = false; + bool vex_prefix = false; bool has_seg_override = false; struct opcode opcode; u16 dummy; @@ -5208,7 +4896,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) for (;;) { switch (ctxt->b = insn_fetch(u8, ctxt)) { case 0x66: /* operand-size override */ - op_prefix = true; + ctxt->op_prefix = true; /* switch between 2/4 bytes */ ctxt->op_bytes = def_op_bytes ^ 6; break; @@ -5247,7 +4935,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) case 0x40 ... 0x4f: /* REX */ if (mode != X86EMUL_MODE_PROT64) goto done_prefixes; - ctxt->rex_prefix = ctxt->b; + ctxt->rex_prefix = REX_PREFIX; + ctxt->rex_bits = ctxt->b & 0xf; continue; case 0xf0: /* LOCK */ ctxt->lock_prefix = 1; @@ -5261,20 +4950,33 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) } /* Any legacy prefix after a REX prefix nullifies its effect. */ - - ctxt->rex_prefix = 0; + ctxt->rex_prefix = REX_NONE; + ctxt->rex_bits = 0; } done_prefixes: /* REX prefix. */ - if (ctxt->rex_prefix & 8) - ctxt->op_bytes = 8; /* REX.W */ + if (ctxt->rex_bits & REX_W) + ctxt->op_bytes = 8; /* Opcode byte(s). */ - opcode = opcode_table[ctxt->b]; - /* Two-byte opcode? */ - if (ctxt->b == 0x0f) { + if (ctxt->b == 0xc4 || ctxt->b == 0xc5) { + /* VEX or LDS/LES */ + u8 vex_2nd = insn_fetch(u8, ctxt); + if (mode != X86EMUL_MODE_PROT64 && (vex_2nd & 0xc0) != 0xc0) { + opcode = opcode_table[ctxt->b]; + ctxt->modrm = vex_2nd; + /* the Mod/RM byte has been fetched already! */ + goto done_modrm; + } + + vex_prefix = true; + rc = x86_decode_avx(ctxt, ctxt->b, vex_2nd, &opcode); + if (rc != X86EMUL_CONTINUE) + goto done; + } else if (ctxt->b == 0x0f) { + /* Two- or three-byte opcode */ ctxt->opcode_len = 2; ctxt->b = insn_fetch(u8, ctxt); opcode = twobyte_table[ctxt->b]; @@ -5285,18 +4987,16 @@ done_prefixes: ctxt->b = insn_fetch(u8, ctxt); opcode = opcode_map_0f_38[ctxt->b]; } + } else { + /* Opcode byte(s). */ + opcode = opcode_table[ctxt->b]; } - ctxt->d = opcode.flags; - if (ctxt->d & ModRM) + if (opcode.flags & ModRM) ctxt->modrm = insn_fetch(u8, ctxt); - /* vex-prefix instructions are not implemented */ - if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) && - (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) { - ctxt->d = NotImpl; - } - +done_modrm: + ctxt->d = opcode.flags; while (ctxt->d & GroupMask) { switch (ctxt->d & GroupMask) { case Group: @@ -5315,9 +5015,9 @@ done_prefixes: opcode = opcode.u.group[goffset]; break; case Prefix: - if (ctxt->rep_prefix && op_prefix) + if (ctxt->rep_prefix && ctxt->op_prefix) return EMULATION_FAILED; - simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix; + simd_prefix = ctxt->op_prefix ? 0x66 : ctxt->rep_prefix; switch (simd_prefix) { case 0x00: opcode = opcode.u.gprefix->pfx_no; break; case 0x66: opcode = opcode.u.gprefix->pfx_66; break; @@ -5356,13 +5056,63 @@ done_prefixes: ctxt->d |= opcode.flags; } + ctxt->is_branch = opcode.flags & IsBranch; + /* Unrecognised? */ if (ctxt->d == 0) return EMULATION_FAILED; + if (unlikely(vex_prefix)) { + /* + * Only specifically marked instructions support VEX. Since many + * instructions support it but are not annotated, return not implemented + * rather than #UD. + */ + if (!(ctxt->d & Avx)) + return EMULATION_FAILED; + + if (!(ctxt->d & AlignMask)) + ctxt->d |= Unaligned; + } + ctxt->execute = opcode.u.execute; - if (unlikely(ctxt->ud) && likely(!(ctxt->d & EmulateOnUD))) + /* + * Reject emulation if KVM might need to emulate shadow stack updates + * and/or indirect branch tracking enforcement, which the emulator + * doesn't support. + */ + if ((is_ibt_instruction(ctxt) || is_shstk_instruction(ctxt)) && + ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET) { + u64 u_cet = 0, s_cet = 0; + + /* + * Check both User and Supervisor on far transfers as inter- + * privilege level transfers are impacted by CET at the target + * privilege level, and that is not known at this time. The + * expectation is that the guest will not require emulation of + * any CET-affected instructions at any privilege level. + */ + if (!(ctxt->d & NearBranch)) + u_cet = s_cet = CET_SHSTK_EN | CET_ENDBR_EN; + else if (ctxt->ops->cpl(ctxt) == 3) + u_cet = CET_SHSTK_EN | CET_ENDBR_EN; + else + s_cet = CET_SHSTK_EN | CET_ENDBR_EN; + + if ((u_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_U_CET, &u_cet)) || + (s_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_S_CET, &s_cet))) + return EMULATION_FAILED; + + if ((u_cet | s_cet) & CET_SHSTK_EN && is_shstk_instruction(ctxt)) + return EMULATION_FAILED; + + if ((u_cet | s_cet) & CET_ENDBR_EN && is_ibt_instruction(ctxt)) + return EMULATION_FAILED; + } + + if (unlikely(emulation_type & EMULTYPE_TRAP_UD) && + likely(!(ctxt->d & EmulateOnUD))) return EMULATION_FAILED; if (unlikely(ctxt->d & @@ -5395,8 +5145,10 @@ done_prefixes: if ((ctxt->d & No16) && ctxt->op_bytes == 2) ctxt->op_bytes = 4; - if (ctxt->d & Sse) - ctxt->op_bytes = 16; + if (vex_prefix) + ; + else if (ctxt->d & Sse) + ctxt->op_bytes = 16, ctxt->d &= ~Avx; else if (ctxt->d & Mmx) ctxt->op_bytes = 8; } @@ -5476,9 +5228,9 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt) { int rc; - emulator_get_fpu(); + kvm_fpu_get(); rc = asm_safe("fwait"); - emulator_put_fpu(); + kvm_fpu_put(); if (unlikely(rc != X86EMUL_CONTINUE)) return emulate_exception(ctxt, MF_VECTOR, 0, false); @@ -5489,43 +5241,31 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt) static void fetch_possible_mmx_operand(struct operand *op) { if (op->type == OP_MM) - read_mmx_reg(&op->mm_val, op->addr.mm); -} - -static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop) -{ - ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; - - if (!(ctxt->d & ByteOp)) - fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; - - asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n" - : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), - [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT - : "c"(ctxt->src2.val)); - - ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); - if (!fop) /* exception is returned in fop variable */ - return emulate_de(ctxt); - return X86EMUL_CONTINUE; + kvm_read_mmx_reg(op->addr.mm, &op->mm_val); } void init_decode_cache(struct x86_emulate_ctxt *ctxt) { - memset(&ctxt->rip_relative, 0, - (void *)&ctxt->modrm - (void *)&ctxt->rip_relative); + /* Clear fields that are set conditionally but read without a guard. */ + ctxt->rip_relative = false; + ctxt->rex_prefix = REX_NONE; + ctxt->rex_bits = 0; + ctxt->lock_prefix = 0; + ctxt->op_prefix = false; + ctxt->rep_prefix = 0; + ctxt->regs_valid = 0; + ctxt->regs_dirty = 0; ctxt->io_read.pos = 0; ctxt->io_read.end = 0; ctxt->mem_read.end = 0; } -int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) +int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, bool check_intercepts) { const struct x86_emulate_ops *ops = ctxt->ops; int rc = X86EMUL_CONTINUE; int saved_dst_type = ctxt->dst.type; - unsigned emul_flags; ctxt->mem_read.pos = 0; @@ -5540,22 +5280,35 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } - emul_flags = ctxt->ops->get_hflags(ctxt); if (unlikely(ctxt->d & - (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) { + (No64|Undefined|Avx|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) { if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) || (ctxt->d & Undefined)) { rc = emulate_ud(ctxt); goto done; } - if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM))) - || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { + if ((ctxt->d & (Avx|Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM))) { rc = emulate_ud(ctxt); goto done; } - if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { + if (ctxt->d & Avx) { + u64 xcr = 0; + if (!(ops->get_cr(ctxt, 4) & X86_CR4_OSXSAVE) + || ops->get_xcr(ctxt, 0, &xcr) + || !(xcr & XFEATURE_MASK_YMM)) { + rc = emulate_ud(ctxt); + goto done; + } + } else if (ctxt->d & Sse) { + if (!(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR)) { + rc = emulate_ud(ctxt); + goto done; + } + } + + if ((ctxt->d & (Avx|Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { rc = emulate_nm(ctxt); goto done; } @@ -5574,7 +5327,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) fetch_possible_mmx_operand(&ctxt->dst); } - if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) { + if (unlikely(check_intercepts) && ctxt->intercept) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_PRE_EXCEPT); if (rc != X86EMUL_CONTINUE) @@ -5603,7 +5356,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } - if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) { + if (unlikely(check_intercepts) && (ctxt->d & Intercept)) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_POST_EXCEPT); if (rc != X86EMUL_CONTINUE) @@ -5657,7 +5410,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) special_insn: - if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) { + if (unlikely(check_intercepts) && (ctxt->d & Intercept)) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_POST_MEMACCESS); if (rc != X86EMUL_CONTINUE) @@ -5670,10 +5423,7 @@ special_insn: ctxt->eflags &= ~X86_EFLAGS_RF; if (ctxt->execute) { - if (ctxt->d & Fastop) - rc = fastop(ctxt, ctxt->fop); - else - rc = ctxt->execute(ctxt); + rc = ctxt->execute(ctxt); if (rc != X86EMUL_CONTINUE) goto done; goto writeback; @@ -5807,7 +5557,8 @@ writeback: done: if (rc == X86EMUL_PROPAGATE_FAULT) { - WARN_ON(ctxt->exception.vector > 0x1f); + if (KVM_EMULATOR_BUG_ON(ctxt->exception.vector > 0x1f, ctxt)) + return EMULATION_FAILED; ctxt->have_exception = true; } if (rc == X86EMUL_INTERCEPTED) @@ -5832,7 +5583,7 @@ twobyte_insn: ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg); break; case 0x21: /* mov from dr to reg */ - ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val); + ctxt->dst.val = ops->get_dr(ctxt, ctxt->modrm_reg); break; case 0x40 ... 0x4f: /* cmov */ if (test_cc(ctxt->b, ctxt->eflags)) |
