summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-12-10 08:24:16 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-12-10 08:24:16 -0800
commitc465fc11e579e3fb65a5b74fcfbf616e084bc05e (patch)
tree89c872d9a2ba4abcc4504e4073b364dc1dcc4525 /arch/x86
parent4ded3bec65a07343258ed8fd9d46483f032d866f (diff)
parentb1394e745b9453dcb5b0671c205b770e87dedb87 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Radim Krčmář: "ARM: - A number of issues in the vgic discovered using SMATCH - A bit one-off calculation in out stage base address mask (32-bit and 64-bit) - Fixes to single-step debugging instructions that trap for other reasons such as MMMIO aborts - Printing unavailable hyp mode as error - Potential spinlock deadlock in the vgic - Avoid calling vgic vcpu free more than once - Broken bit calculation for big endian systems s390: - SPDX tags - Fence storage key accesses from problem state - Make sure that irq_state.flags is not used in the future x86: - Intercept port 0x80 accesses to prevent host instability (CVE) - Use userspace FPU context for guest FPU (mainly an optimization that fixes a double use of kernel FPU) - Do not leak one page per module load - Flush APIC page address cache from MMU invalidation notifiers" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (28 commits) KVM: x86: fix APIC page invalidation KVM: s390: Fix skey emulation permission check KVM: s390: mark irq_state.flags as non-usable KVM: s390: Remove redundant license text KVM: s390: add SPDX identifiers to the remaining files KVM: VMX: fix page leak in hardware_setup() KVM: VMX: remove I/O port 0x80 bypass on Intel hosts x86,kvm: remove KVM emulator get_fpu / put_fpu x86,kvm: move qemu/guest FPU switching out to vcpu_run KVM: arm/arm64: Fix broken GICH_ELRSR big endian conversion KVM: arm/arm64: kvm_arch_destroy_vm cleanups KVM: arm/arm64: Fix spinlock acquisition in vgic_set_owner kvm: arm: don't treat unavailable HYP mode as an error KVM: arm/arm64: Avoid attempting to load timer vgic state without a vgic kvm: arm64: handle single-step of hyp emulated mmio instructions kvm: arm64: handle single-step during SError exceptions kvm: arm64: handle single-step of userspace mmio instructions kvm: arm64: handle single-stepping trapped instructions KVM: arm/arm64: debug: Introduce helper for single-step arm: KVM: Fix VTTBR_BADDR_MASK BUG_ON off-by-one ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/kvm_emulate.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h16
-rw-r--r--arch/x86/kvm/emulate.c24
-rw-r--r--arch/x86/kvm/vmx.c6
-rw-r--r--arch/x86/kvm/x86.c63
5 files changed, 47 insertions, 64 deletions
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 034caa1a084e..b24b1c8b3979 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -214,8 +214,6 @@ struct x86_emulate_ops {
void (*halt)(struct x86_emulate_ctxt *ctxt);
void (*wbinvd)(struct x86_emulate_ctxt *ctxt);
int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt);
- void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */
- void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */
int (*intercept)(struct x86_emulate_ctxt *ctxt,
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 977de5fb968b..516798431328 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -536,7 +536,20 @@ struct kvm_vcpu_arch {
struct kvm_mmu_memory_cache mmu_page_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache;
+ /*
+ * QEMU userspace and the guest each have their own FPU state.
+ * In vcpu_run, we switch between the user and guest FPU contexts.
+ * While running a VCPU, the VCPU thread will have the guest FPU
+ * context.
+ *
+ * Note that while the PKRU state lives inside the fpu registers,
+ * it is switched out separately at VMENTER and VMEXIT time. The
+ * "guest_fpu" state here contains the guest FPU context, with the
+ * host PRKU bits.
+ */
+ struct fpu user_fpu;
struct fpu guest_fpu;
+
u64 xcr0;
u64 guest_supported_xcr0;
u32 guest_xstate_size;
@@ -1435,4 +1448,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
#define put_smstate(type, buf, offset, val) \
*(type *)((buf) + (offset) - 0x7e00) = val
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end);
+
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e7d04d0c8008..abe74f779f9d 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1046,7 +1046,6 @@ static void fetch_register_operand(struct operand *op)
static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
{
- ctxt->ops->get_fpu(ctxt);
switch (reg) {
case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
@@ -1068,13 +1067,11 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
#endif
default: BUG();
}
- ctxt->ops->put_fpu(ctxt);
}
static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
int reg)
{
- ctxt->ops->get_fpu(ctxt);
switch (reg) {
case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
@@ -1096,12 +1093,10 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
#endif
default: BUG();
}
- ctxt->ops->put_fpu(ctxt);
}
static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
{
- ctxt->ops->get_fpu(ctxt);
switch (reg) {
case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
@@ -1113,12 +1108,10 @@ static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
default: BUG();
}
- ctxt->ops->put_fpu(ctxt);
}
static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
{
- ctxt->ops->get_fpu(ctxt);
switch (reg) {
case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
@@ -1130,7 +1123,6 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
default: BUG();
}
- ctxt->ops->put_fpu(ctxt);
}
static int em_fninit(struct x86_emulate_ctxt *ctxt)
@@ -1138,9 +1130,7 @@ static int em_fninit(struct x86_emulate_ctxt *ctxt)
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
return emulate_nm(ctxt);
- ctxt->ops->get_fpu(ctxt);
asm volatile("fninit");
- ctxt->ops->put_fpu(ctxt);
return X86EMUL_CONTINUE;
}
@@ -1151,9 +1141,7 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
return emulate_nm(ctxt);
- ctxt->ops->get_fpu(ctxt);
asm volatile("fnstcw %0": "+m"(fcw));
- ctxt->ops->put_fpu(ctxt);
ctxt->dst.val = fcw;
@@ -1167,9 +1155,7 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
return emulate_nm(ctxt);
- ctxt->ops->get_fpu(ctxt);
asm volatile("fnstsw %0": "+m"(fsw));
- ctxt->ops->put_fpu(ctxt);
ctxt->dst.val = fsw;
@@ -4001,12 +3987,8 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
- ctxt->ops->get_fpu(ctxt);
-
rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
- ctxt->ops->put_fpu(ctxt);
-
if (rc != X86EMUL_CONTINUE)
return rc;
@@ -4049,8 +4031,6 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
- ctxt->ops->get_fpu(ctxt);
-
if (size < __fxstate_size(16)) {
rc = fxregs_fixup(&fx_state, size);
if (rc != X86EMUL_CONTINUE)
@@ -4066,8 +4046,6 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
out:
- ctxt->ops->put_fpu(ctxt);
-
return rc;
}
@@ -5317,9 +5295,7 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
{
int rc;
- ctxt->ops->get_fpu(ctxt);
rc = asm_safe("fwait");
- ctxt->ops->put_fpu(ctxt);
if (unlikely(rc != X86EMUL_CONTINUE))
return emulate_exception(ctxt, MF_VECTOR, 0, false);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4704aaf6d19e..8eba631c4dbd 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6751,16 +6751,10 @@ static __init int hardware_setup(void)
goto out;
}
- vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
- /*
- * Allow direct access to the PC debug port (it is often used for I/O
- * delays, but the vmexits simply slow things down).
- */
memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
- clear_bit(0x80, vmx_io_bitmap_a);
memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index eee8e7faf1af..faf843c9b916 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2937,7 +2937,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
srcu_read_unlock(&vcpu->kvm->srcu, idx);
pagefault_enable();
kvm_x86_ops->vcpu_put(vcpu);
- kvm_put_guest_fpu(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
}
@@ -5252,17 +5251,6 @@ static void emulator_halt(struct x86_emulate_ctxt *ctxt)
emul_to_vcpu(ctxt)->arch.halt_request = 1;
}
-static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
-{
- preempt_disable();
- kvm_load_guest_fpu(emul_to_vcpu(ctxt));
-}
-
-static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
-{
- preempt_enable();
-}
-
static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
struct x86_instruction_info *info,
enum x86_intercept_stage stage)
@@ -5340,8 +5328,6 @@ static const struct x86_emulate_ops emulate_ops = {
.halt = emulator_halt,
.wbinvd = emulator_wbinvd,
.fix_hypercall = emulator_fix_hypercall,
- .get_fpu = emulator_get_fpu,
- .put_fpu = emulator_put_fpu,
.intercept = emulator_intercept,
.get_cpuid = emulator_get_cpuid,
.set_nmi_mask = emulator_set_nmi_mask,
@@ -6778,6 +6764,20 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
kvm_x86_ops->tlb_flush(vcpu);
}
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end)
+{
+ unsigned long apic_address;
+
+ /*
+ * The physical address of apic access page is stored in the VMCS.
+ * Update it when it becomes invalid.
+ */
+ apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+ if (start <= apic_address && apic_address < end)
+ kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+}
+
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
{
struct page *page = NULL;
@@ -6952,7 +6952,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
preempt_disable();
kvm_x86_ops->prepare_guest_switch(vcpu);
- kvm_load_guest_fpu(vcpu);
/*
* Disable IRQs before setting IN_GUEST_MODE. Posted interrupt
@@ -7297,12 +7296,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
}
+ kvm_load_guest_fpu(vcpu);
+
if (unlikely(vcpu->arch.complete_userspace_io)) {
int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
vcpu->arch.complete_userspace_io = NULL;
r = cui(vcpu);
if (r <= 0)
- goto out;
+ goto out_fpu;
} else
WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
@@ -7311,6 +7312,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
else
r = vcpu_run(vcpu);
+out_fpu:
+ kvm_put_guest_fpu(vcpu);
out:
post_kvm_run_save(vcpu);
kvm_sigset_deactivate(vcpu);
@@ -7704,32 +7707,25 @@ static void fx_init(struct kvm_vcpu *vcpu)
vcpu->arch.cr0 |= X86_CR0_ET;
}
+/* Swap (qemu) user FPU context for the guest FPU context. */
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
{
- if (vcpu->guest_fpu_loaded)
- return;
-
- /*
- * Restore all possible states in the guest,
- * and assume host would use all available bits.
- * Guest xcr0 would be loaded later.
- */
- vcpu->guest_fpu_loaded = 1;
- __kernel_fpu_begin();
+ preempt_disable();
+ copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
/* PKRU is separately restored in kvm_x86_ops->run. */
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
~XFEATURE_MASK_PKRU);
+ preempt_enable();
trace_kvm_fpu(1);
}
+/* When vcpu_run ends, restore user space FPU context. */
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
- if (!vcpu->guest_fpu_loaded)
- return;
-
- vcpu->guest_fpu_loaded = 0;
+ preempt_disable();
copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
- __kernel_fpu_end();
+ copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
+ preempt_enable();
++vcpu->stat.fpu_reload;
trace_kvm_fpu(0);
}
@@ -7846,7 +7842,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
* To avoid have the INIT path from kvm_apic_has_events() that be
* called with loaded FPU and does not let userspace fix the state.
*/
- kvm_put_guest_fpu(vcpu);
+ if (init_event)
+ kvm_put_guest_fpu(vcpu);
mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave,
XFEATURE_MASK_BNDREGS);
if (mpx_state_buffer)
@@ -7855,6 +7852,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
XFEATURE_MASK_BNDCSR);
if (mpx_state_buffer)
memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
+ if (init_event)
+ kvm_load_guest_fpu(vcpu);
}
if (!init_event) {