diff options
author | Michael Ellerman <mpe@ellerman.id.au> | 2017-07-31 20:20:29 +1000 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2017-07-31 20:20:29 +1000 |
commit | bb272221e9db79f13d454e1f3fb6b05013be985e (patch) | |
tree | 36f4acc50e3fabac71fadd34c720c0a6011db470 /arch/x86 | |
parent | 253fd51e2f533552ae35a0c661705da6c4842c1b (diff) | |
parent | 5771a8c08880cdca3bfb4a3fc6d309d6bba20877 (diff) |
Merge tag 'v4.13-rc1' into fixes
The fixes branch is based off a random pre-rc1 commit, because we had
some fixes that needed to go in before rc1 was released.
However we now need to fix some code that went in after that point, but
before rc1, so merge rc1 to get that code into fixes so we can fix it!
Diffstat (limited to 'arch/x86')
33 files changed, 395 insertions, 217 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 94a18681353d..781521b7cf9e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -50,6 +50,7 @@ config X86 select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER + select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV if X86_64 select ARCH_HAS_MMIO_FLUSH @@ -162,6 +163,7 @@ config X86 select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS_NMI + select HAVE_HARDLOCKUP_DETECTOR_PERF if HAVE_PERF_EVENTS_NMI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 00241c815524..a0838ab929f2 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -411,3 +411,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, debug_putstr("done.\nBooting the kernel.\n"); return output; } + +void fortify_panic(const char *name) +{ + error("detected buffer overflow"); +} diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index fc61739150e7..f960a043cdeb 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -201,7 +201,7 @@ asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, static bool avx2_usable(void) { - if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) + if (false && avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI1) && boot_cpu_has(X86_FEATURE_BMI2)) return true; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 2701e5f8145b..ca3c48c0872f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -286,6 +286,7 @@ #define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ +#define X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE (15*32+15) /* Virtual VMLOAD VMSAVE */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index d2ff779f347e..796ff6c1aa53 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -33,7 +33,7 @@ #ifdef CONFIG_X86_32 -extern unsigned long asmlinkage efi_call_phys(void *, ...); +extern asmlinkage unsigned long efi_call_phys(void *, ...); #define arch_efi_call_virt_setup() kernel_fpu_begin() #define arch_efi_call_virt_teardown() kernel_fpu_end() @@ -52,7 +52,7 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); #define EFI_LOADER_SIGNATURE "EL64" -extern u64 asmlinkage efi_call(void *fp, ...); +extern asmlinkage u64 efi_call(void *fp, ...); #define efi_call_phys(f, args...) efi_call((f), args) diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index e8ab9a46bc68..1c18d83d3f09 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -245,12 +245,13 @@ extern int force_personality32; #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE 4096 -/* This is the location that an ET_DYN program is loaded if exec'ed. Typical - use of this is to invoke "./ld.so someprog" to test out a new version of - the loader. We need to make sure that it is out of the way of the program - that it will "exec", and that there is sufficient room for the brk. */ - -#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) +/* + * This is the base location for PIE (ET_DYN with INTERP) loads. On + * 64-bit, this is raised to 4GB to leave the entire 32-bit address + * space open for things that want to use the area for 32-bit pointers. + */ +#define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \ + 0x100000000UL) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. This could be done in user space, diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 722d0e568863..fde36f189836 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -23,6 +23,7 @@ struct x86_exception { u16 error_code; bool nested_page_fault; u64 address; /* cr2 or nested page fault gpa */ + u8 async_page_fault; }; /* diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1588e9e3dc01..87ac4fba6d8e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -462,10 +462,12 @@ struct kvm_vcpu_hv_synic { DECLARE_BITMAP(auto_eoi_bitmap, 256); DECLARE_BITMAP(vec_bitmap, 256); bool active; + bool dont_zero_synic_pages; }; /* Hyper-V per vcpu emulation context */ struct kvm_vcpu_hv { + u32 vp_index; u64 hv_vapic; s64 runtime_offset; struct kvm_vcpu_hv_synic synic; @@ -549,6 +551,7 @@ struct kvm_vcpu_arch { bool reinject; u8 nr; u32 error_code; + u8 nested_apf; } exception; struct kvm_queued_interrupt { @@ -649,6 +652,9 @@ struct kvm_vcpu_arch { u64 msr_val; u32 id; bool send_user_only; + u32 host_apf_reason; + unsigned long nested_apf_token; + bool delivery_as_pf_vmexit; } apf; /* OSVW MSRs (AMD only) */ @@ -803,6 +809,7 @@ struct kvm_arch { int audit_point; #endif + bool backwards_tsc_observed; bool boot_vcpu_runs_old_kvmclock; u32 bsp_vcpu_id; @@ -952,9 +959,7 @@ struct kvm_x86_ops { unsigned char *hypercall_addr); void (*set_irq)(struct kvm_vcpu *vcpu); void (*set_nmi)(struct kvm_vcpu *vcpu); - void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, - bool has_error_code, u32 error_code, - bool reinject); + void (*queue_exception)(struct kvm_vcpu *vcpu); void (*cancel_injection)(struct kvm_vcpu *vcpu); int (*interrupt_allowed)(struct kvm_vcpu *vcpu); int (*nmi_allowed)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index dcbd9bcce714..8abedf1d650e 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -74,6 +74,7 @@ static __always_inline void boot_init_stack_canary(void) get_random_bytes(&canary, sizeof(canary)); tsc = rdtsc(); canary += tsc + (tsc << 32UL); + canary &= CANARY_MASK; current->stack_canary = canary; #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index 3d3e8353ee5c..e9ee84873de5 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h @@ -142,7 +142,9 @@ static __always_inline void *__constant_memcpy(void *to, const void *from, } #define __HAVE_ARCH_MEMCPY +extern void *memcpy(void *, const void *, size_t); +#ifndef CONFIG_FORTIFY_SOURCE #ifdef CONFIG_X86_USE_3DNOW #include <asm/mmx.h> @@ -195,11 +197,15 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) #endif #endif +#endif /* !CONFIG_FORTIFY_SOURCE */ #define __HAVE_ARCH_MEMMOVE void *memmove(void *dest, const void *src, size_t n); +extern int memcmp(const void *, const void *, size_t); +#ifndef CONFIG_FORTIFY_SOURCE #define memcmp __builtin_memcmp +#endif #define __HAVE_ARCH_MEMCHR extern void *memchr(const void *cs, int c, size_t count); @@ -321,6 +327,8 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern, : __memset_generic((s), (c), (count))) #define __HAVE_ARCH_MEMSET +extern void *memset(void *, int, size_t); +#ifndef CONFIG_FORTIFY_SOURCE #if (__GNUC__ >= 4) #define memset(s, c, count) __builtin_memset(s, c, count) #else @@ -330,6 +338,7 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern, (count)) \ : __memset((s), (c), (count))) #endif +#endif /* !CONFIG_FORTIFY_SOURCE */ /* * find the first occurrence of byte 'c', or 1 past the area if none diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 1f22bc277c45..2a8c822de1fc 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -31,6 +31,7 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t extern void *memcpy(void *to, const void *from, size_t len); extern void *__memcpy(void *to, const void *from, size_t len); +#ifndef CONFIG_FORTIFY_SOURCE #ifndef CONFIG_KMEMCHECK #if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4 #define memcpy(dst, src, len) \ @@ -51,6 +52,7 @@ extern void *__memcpy(void *to, const void *from, size_t len); */ #define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len)) #endif +#endif /* !CONFIG_FORTIFY_SOURCE */ #define __HAVE_ARCH_MEMSET void *memset(void *s, int c, size_t n); @@ -77,6 +79,11 @@ int strcmp(const char *cs, const char *ct); #define memcpy(dst, src, len) __memcpy(dst, src, len) #define memmove(dst, src, len) __memmove(dst, src, len) #define memset(s, c, n) __memset(s, c, n) + +#ifndef __NO_FORTIFY +#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */ +#endif + #endif #define __HAVE_ARCH_MEMCPY_MCSAFE 1 diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 14824fc78f7e..58fffe79e417 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -83,7 +83,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u32 event_inj; u32 event_inj_err; u64 nested_cr3; - u64 lbr_ctl; + u64 virt_ext; u32 clean; u32 reserved_5; u64 next_rip; @@ -119,6 +119,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define AVIC_ENABLE_SHIFT 31 #define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT) +#define LBR_CTL_ENABLE_MASK BIT_ULL(0) +#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1) + #define SVM_INTERRUPT_SHADOW_MASK 1 #define SVM_IOIO_STR_SHIFT 2 diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 476ea27f490b..30269dafec47 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -535,9 +535,6 @@ struct __large_struct { unsigned long buf[100]; }; #define __put_user(x, ptr) \ __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) -#define __get_user_unaligned __get_user -#define __put_user_unaligned __put_user - /* * {get|put}_user_try and catch * diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index cff0bb6556f8..a965e5b0d328 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -67,6 +67,7 @@ struct kvm_clock_pairing { #define KVM_ASYNC_PF_ENABLED (1 << 0) #define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1) +#define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2) /* Operations for KVM_HC_MMU_OP */ #define KVM_MMU_OP_WRITE_PTE 1 diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index c73c9fb281e1..d6f387780849 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -19,7 +19,7 @@ #include <linux/init.h> #include <linux/delay.h> -#ifdef CONFIG_HARDLOCKUP_DETECTOR +#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF u64 hw_nmi_get_sample_period(int watchdog_thresh) { return (u64)(cpu_khz) * 1000 * watchdog_thresh; diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 22217ece26c8..44404e2307bb 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -457,7 +457,7 @@ static int prepare_elf64_headers(struct crash_elf_data *ced, bufp += sizeof(Elf64_Phdr); phdr->p_type = PT_NOTE; phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note(); - phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note); + phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE; (ehdr->e_phnum)++; #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 43e10d6fdbed..71c17a5be983 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -330,7 +330,12 @@ static void kvm_guest_cpu_init(void) #ifdef CONFIG_PREEMPT pa |= KVM_ASYNC_PF_SEND_ALWAYS; #endif - wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED); + pa |= KVM_ASYNC_PF_ENABLED; + + /* Async page fault support for L1 hypervisor is optional */ + if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN, + (pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0) + wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); __this_cpu_write(apf_reason.enabled, 1); printk(KERN_INFO"KVM setup async PF for cpu %d\n", smp_processor_id()); diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index ebae57ac5902..2695a34fa1c5 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -106,14 +106,27 @@ static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, return 0; } -static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vcpu_id) +static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx) +{ + struct kvm_vcpu *vcpu = NULL; + int i; + + if (vpidx < KVM_MAX_VCPUS) + vcpu = kvm_get_vcpu(kvm, vpidx); + if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx) + return vcpu; + kvm_for_each_vcpu(i, vcpu, kvm) + if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx) + return vcpu; + return NULL; +} + +static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx) { struct kvm_vcpu *vcpu; struct kvm_vcpu_hv_synic *synic; - if (vcpu_id >= atomic_read(&kvm->online_vcpus)) - return NULL; - vcpu = kvm_get_vcpu(kvm, vcpu_id); + vcpu = get_vcpu_by_vpidx(kvm, vpidx); if (!vcpu) return NULL; synic = vcpu_to_synic(vcpu); @@ -221,7 +234,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, synic->version = data; break; case HV_X64_MSR_SIEFP: - if (data & HV_SYNIC_SIEFP_ENABLE) + if ((data & HV_SYNIC_SIEFP_ENABLE) && !host && + !synic->dont_zero_synic_pages) if (kvm_clear_guest(vcpu->kvm, data & PAGE_MASK, PAGE_SIZE)) { ret = 1; @@ -232,7 +246,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, synic_exit(synic, msr); break; case HV_X64_MSR_SIMP: - if (data & HV_SYNIC_SIMP_ENABLE) + if ((data & HV_SYNIC_SIMP_ENABLE) && !host && + !synic->dont_zero_synic_pages) if (kvm_clear_guest(vcpu->kvm, data & PAGE_MASK, PAGE_SIZE)) { ret = 1; @@ -318,11 +333,11 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) return ret; } -int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint) +int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint) { struct kvm_vcpu_hv_synic *synic; - synic = synic_get(kvm, vcpu_id); + synic = synic_get(kvm, vpidx); if (!synic) return -EINVAL; @@ -341,11 +356,11 @@ void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector) kvm_hv_notify_acked_sint(vcpu, i); } -static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vcpu_id, u32 sint, int gsi) +static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi) { struct kvm_vcpu_hv_synic *synic; - synic = synic_get(kvm, vcpu_id); + synic = synic_get(kvm, vpidx); if (!synic) return -EINVAL; @@ -687,14 +702,24 @@ void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) stimer_init(&hv_vcpu->stimer[i], i); } -int kvm_hv_activate_synic(struct kvm_vcpu *vcpu) +void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); + + hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu); +} + +int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages) { + struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); + /* * Hyper-V SynIC auto EOI SINT's are * not compatible with APICV, so deactivate APICV */ kvm_vcpu_deactivate_apicv(vcpu); - vcpu_to_synic(vcpu)->active = true; + synic->active = true; + synic->dont_zero_synic_pages = dont_zero_synic_pages; return 0; } @@ -978,6 +1003,11 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; switch (msr) { + case HV_X64_MSR_VP_INDEX: + if (!host) + return 1; + hv->vp_index = (u32)data; + break; case HV_X64_MSR_APIC_ASSIST_PAGE: { u64 gfn; unsigned long addr; @@ -1089,18 +1119,9 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; switch (msr) { - case HV_X64_MSR_VP_INDEX: { - int r; - struct kvm_vcpu *v; - - kvm_for_each_vcpu(r, v, vcpu->kvm) { - if (v == vcpu) { - data = r; - break; - } - } + case HV_X64_MSR_VP_INDEX: + data = hv->vp_index; break; - } case HV_X64_MSR_EOI: return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); case HV_X64_MSR_ICR: diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index cd1119538add..e637631a9574 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -56,9 +56,10 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu); void kvm_hv_irq_routing_update(struct kvm *kvm); int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint); void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector); -int kvm_hv_activate_synic(struct kvm_vcpu *vcpu); +int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages); void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu); +void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu); void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu); static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index a78b445ce411..af192895b1fc 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -724,8 +724,10 @@ void kvm_free_pit(struct kvm *kvm) struct kvm_pit *pit = kvm->arch.vpit; if (pit) { + mutex_lock(&kvm->slots_lock); kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev); kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->speaker_dev); + mutex_unlock(&kvm->slots_lock); kvm_pit_set_reinject(pit, false); hrtimer_cancel(&pit->pit_state.timer); kthread_destroy_worker(pit->worker); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index aafd399cf8c6..9b1dd114956a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -46,6 +46,7 @@ #include <asm/io.h> #include <asm/vmx.h> #include <asm/kvm_page_track.h> +#include "trace.h" /* * When setting this variable to true it enables Two-Dimensional-Paging @@ -3748,7 +3749,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu) kvm_event_needs_reinjection(vcpu))) return false; - if (is_guest_mode(vcpu)) + if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu)) return false; return kvm_x86_ops->interrupt_allowed(vcpu); @@ -3780,6 +3781,38 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, return false; } +int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, + u64 fault_address, char *insn, int insn_len, + bool need_unprotect) +{ + int r = 1; + + switch (vcpu->arch.apf.host_apf_reason) { + default: + trace_kvm_page_fault(fault_address, error_code); + + if (need_unprotect && kvm_event_needs_reinjection(vcpu)) + kvm_mmu_unprotect_page_virt(vcpu, fault_address); + r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn, + insn_len); + break; + case KVM_PV_REASON_PAGE_NOT_PRESENT: + vcpu->arch.apf.host_apf_reason = 0; + local_irq_disable(); + kvm_async_pf_task_wait(fault_address); + local_irq_enable(); + break; + case KVM_PV_REASON_PAGE_READY: + vcpu->arch.apf.host_apf_reason = 0; + local_irq_disable(); + kvm_async_pf_task_wake(fault_address); + local_irq_enable(); + break; + } + return r; +} +EXPORT_SYMBOL_GPL(kvm_handle_page_fault); + static bool check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level) { diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index a276834950c1..d7d248a000dd 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -77,6 +77,9 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, bool accessed_dirty); bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu); +int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, + u64 fault_address, char *insn, int insn_len, + bool need_unprotect); static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 905ea6052517..4d8141e533c3 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -194,7 +194,6 @@ struct vcpu_svm { unsigned int3_injected; unsigned long int3_rip; - u32 apf_reason; /* cached guest cpuid flags for faster access */ bool nrips_enabled : 1; @@ -277,6 +276,10 @@ static int avic; module_param(avic, int, S_IRUGO); #endif +/* enable/disable Virtual VMLOAD VMSAVE */ +static int vls = true; +module_param(vls, int, 0444); + /* AVIC VM ID bit masks and lock */ static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR); static DEFINE_SPINLOCK(avic_vm_id_lock); @@ -633,11 +636,13 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) svm_set_interrupt_shadow(vcpu, 0); } -static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, - bool has_error_code, u32 error_code, - bool reinject) +static void svm_queue_exception(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + unsigned nr = vcpu->arch.exception.nr; + bool has_error_code = vcpu->arch.exception.has_error_code; + bool reinject = vcpu->arch.exception.reinject; + u32 error_code = vcpu->arch.exception.error_code; /* * If we are within a nested VM we'd better #VMEXIT and let the guest @@ -947,7 +952,7 @@ static void svm_enable_lbrv(struct vcpu_svm *svm) { u32 *msrpm = svm->msrpm; - svm->vmcb->control.lbr_ctl = 1; + svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1); set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); @@ -958,7 +963,7 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) { u32 *msrpm = svm->msrpm; - svm->vmcb->control.lbr_ctl = 0; + svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK; set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0); set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0); set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0); @@ -1093,6 +1098,16 @@ static __init int svm_hardware_setup(void) } } + if (vls) { + if (!npt_enabled || + !boot_cpu_has(X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE) || + !IS_ENABLED(CONFIG_X86_64)) { + vls = false; + } else { + pr_info("Virtual VMLOAD VMSAVE supported\n"); + } + } + return 0; err: @@ -1280,6 +1295,16 @@ static void init_vmcb(struct vcpu_svm *svm) if (avic) avic_init_vmcb(svm); + /* + * If hardware supports Virtual VMLOAD VMSAVE then enable it + * in VMCB and clear intercepts to avoid #VMEXIT. + */ + if (vls) { + clr_intercept(svm, INTERCEPT_VMLOAD); + clr_intercept(svm, INTERCEPT_VMSAVE); + svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; + } + mark_all_dirty(svm->vmcb); enable_gif(svm); @@ -2096,34 +2121,11 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) static int pf_interception(struct vcpu_svm *svm) { u64 fault_address = svm->vmcb->control.exit_info_2; - u64 error_code; - int r = 1; + u64 error_code = svm->vmcb->control.exit_info_1; - switch (svm->apf_reason) { - default: - error_code = svm->vmcb->control.exit_info_1; - - trace_kvm_page_fault(fault_address, error_code); - if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu)) - kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); - r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code, + return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address, svm->vmcb->control.insn_bytes, - svm->vmcb->control.insn_len); - break; - case KVM_PV_REASON_PAGE_NOT_PRESENT: - svm->apf_reason = 0; - local_irq_disable(); - kvm_async_pf_task_wait(fault_address); - local_irq_enable(); - break; - case KVM_PV_REASON_PAGE_READY: - svm->apf_reason = 0; - local_irq_disable(); - kvm_async_pf_task_wake(fault_address); - local_irq_enable(); - break; - } - return r; + svm->vmcb->control.insn_len, !npt_enabled); } static int db_interception(struct vcpu_svm *svm) @@ -2267,7 +2269,7 @@ static int io_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ - int size, in, string; + int size, in, string, ret; unsigned port; ++svm->vcpu.stat.io_exits; @@ -2279,10 +2281,16 @@ static int io_interception(struct vcpu_svm *svm) port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; svm->next_rip = svm->vmcb->control.exit_info_2; - skip_emulated_instruction(&svm->vcpu); + ret = kvm_skip_emulated_instruction(&svm->vcpu); - return in ? kvm_fast_pio_in(vcpu, size, port) - : kvm_fast_pio_out(vcpu, size, port); + /* + * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered + * KVM_EXIT_DEBUG here. + */ + if (in) + return kvm_fast_pio_in(vcpu, size, port) && ret; + else + return kvm_fast_pio_out(vcpu, size, port) && ret; } static int nmi_interception(struct vcpu_svm *svm) @@ -2415,15 +2423,19 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, if (!is_guest_mode(&svm->vcpu)) return 0; + vmexit = nested_svm_intercept(svm); + if (vmexit != NESTED_EXIT_DONE) + return 0; + svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; svm->vmcb->control.exit_code_hi = 0; svm->vmcb->control.exit_info_1 = error_code; - svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; - - vmexit = nested_svm_intercept(svm); - if (vmexit == NESTED_EXIT_DONE) - svm->nested.exit_required = true; + if (svm->vcpu.arch.exception.nested_apf) + svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; + else + svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; + svm->nested.exit_required = true; return vmexit; } @@ -2598,7 +2610,7 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) break; case SVM_EXIT_EXCP_BASE + PF_VECTOR: /* When we're shadowing, trap PFs, but not async PF */ - if (!npt_enabled && svm->apf_reason == 0) + if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0) return NESTED_EXIT_HOST; break; default: @@ -2645,7 +2657,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm) } /* async page fault always cause vmexit */ else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) && - svm->apf_reason != 0) + svm->vcpu.arch.exception.nested_apf != 0) vmexit = NESTED_EXIT_DONE; break; } @@ -2702,7 +2714,7 @@ static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *fr dst->event_inj = from->event_inj; dst->event_inj_err = from->event_inj_err; dst->nested_cr3 = from->nested_cr3; - dst->lbr_ctl = from->lbr_ctl; + dst->virt_ext = from->virt_ext; } static int nested_svm_vmexit(struct vcpu_svm *svm) @@ -3008,7 +3020,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) /* We don't want to see VMMCALLs from a nested guest */ clr_intercept(svm, INTERCEPT_VMMCALL); - svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl; + svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext; svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; svm->vmcb->control.int_state = nested_vmcb->control.int_state; svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; @@ -3055,6 +3067,7 @@ static int vmload_interception(struct vcpu_svm *svm) { struct vmcb *nested_vmcb; struct page *page; + int ret; if (nested_svm_check_permissions(svm)) return 1; @@ -3064,18 +3077,19 @@ static int vmload_interception(struct vcpu_svm *svm) return 1; svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); + ret = kvm_skip_emulated_instruction(&svm->vcpu); nested_svm_vmloadsave(nested_vmcb, svm->vmcb); nested_svm_unmap(page); - return 1; + return ret; } static int vmsave_interception(struct vcpu_svm *svm) { struct vmcb *nested_vmcb; struct page *page; + int ret; if (nested_svm_check_permissions(svm)) return 1; @@ -3085,12 +3099,12 @@ static int vmsave_interception(struct vcpu_svm *svm) return 1; svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); + ret = kvm_skip_emulated_instruction(&svm->vcpu); nested_svm_vmloadsave(svm->vmcb, nested_vmcb); nested_svm_unmap(page); - return 1; + return ret; } static int vmrun_interception(struct vcpu_svm *svm) @@ -3123,25 +3137,29 @@ failed: static int stgi_interception(struct vcpu_svm *svm) { + int ret; + if (nested_svm_check_permissions(svm)) return 1; svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); + ret = kvm_skip_emulated_instruction(&svm->vcpu); kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); enable_gif(svm); - return 1; + return ret; } static int clgi_interception(struct vcpu_svm *svm) { + int ret; + if (nested_svm_check_permissions(svm)) return 1; svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); + ret = kvm_skip_emulated_instruction(&svm->vcpu); disable_gif(svm); @@ -3152,7 +3170,7 @@ static int clgi_interception(struct vcpu_svm *svm) mark_dirty(svm->vmcb, VMCB_INTR); } - return 1; + return ret; } static int invlpga_interception(struct vcpu_svm *svm) @@ -3166,8 +3184,7 @@ static int invlpga_interception(struct vcpu_svm *svm) kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); - return 1; + return kvm_skip_emulated_instruction(&svm->vcpu); } static int skinit_interception(struct vcpu_svm *svm) @@ -3190,7 +3207,7 @@ static int xsetbv_interception(struct vcpu_svm *svm) if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) { svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); + return kvm_skip_emulated_instruction(&svm->vcpu); } return 1; @@ -3286,8 +3303,7 @@ static int invlpg_interception(struct vcpu_svm *svm) return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1); - skip_emulated_instruction(&svm->vcpu); - return 1; + return kvm_skip_emulated_instruction(&svm->vcpu); } static int emulate_on_interception(struct vcpu_svm *svm) @@ -3437,9 +3453,7 @@ static int dr_interception(struct vcpu_svm *svm) kvm_register_write(&svm->vcpu, reg, val); } - skip_emulated_instruction(&svm->vcpu); - - return 1; + return kvm_skip_emulated_instruction(&svm->vcpu); } static int cr8_write_interception(struct vcpu_svm *svm) @@ -3562,6 +3576,7 @@ static int rdmsr_interception(struct vcpu_svm *svm) if (svm_get_msr(&svm->vcpu, &msr_info)) { trace_kvm_msr_read_ex(ecx); kvm_inject_gp(&svm->vcpu, 0); + return 1; } else { trace_kvm_msr_read(ecx, msr_info.data); @@ -3570,9 +3585,8 @@ static int rdmsr_interception(struct vcpu_svm *svm) kvm_register_write(&svm->vcpu, VCPU_REGS_RDX, msr_info.data >> 32); svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; - skip_emulated_instruction(&svm->vcpu); + return kvm_skip_emulated_instruction(&svm->vcpu); } - return 1; } static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data) @@ -3698,11 +3712,11 @@ static int wrmsr_interception(struct vcpu_svm *svm) if (kvm_set_msr(&svm->vcpu, &msr)) { trace_kvm_msr_write_ex(ecx, data); kvm_inject_gp(&svm->vcpu, 0); + return 1; } else { trace_kvm_msr_write(ecx, data); - skip_emulated_instruction(&svm->vcpu); + return kvm_skip_emulated_instruction(&svm->vcpu); } - return 1; } static int msr_interception(struct vcpu_svm *svm) @@ -3731,8 +3745,7 @@ static int pause_interception(struct vcpu_svm *svm) static int nop_interception(struct vcpu_svm *svm) { - skip_emulated_instruction(&(svm->vcpu)); - return 1; + return kvm_skip_emulated_instruction(&(svm->vcpu)); } static int monitor_interception(struct vcpu_svm *svm) @@ -4117,7 +4130,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar); pr_err("%-20s%08x\n", "event_inj:", control->event_inj); pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); - pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl); + pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext); pr_err("%-20s%016llx\n", "next_rip:", control->next_rip); pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page); pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id); @@ -4965,7 +4978,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) /* if exit due to PF check for async PF */ if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) - svm->apf_reason = kvm_read_and_reset_pf_reason(); + svm->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason(); if (npt_enabled) { vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f76efad248ab..84e62acf2dd8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2422,28 +2422,41 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) * KVM wants to inject page-faults which it got to the guest. This function * checks whether in a nested guest, we need to inject them to L1 or L2. */ -static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr) +static int nested_vmx_check_exception(struct kvm_vcpu *vcpu) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + unsigned int nr = vcpu->arch.exception.nr; - if (!(vmcs12->exception_bitmap & (1u << nr))) + if (!((vmcs12->exception_bitmap & (1u << nr)) || + (nr == PF_VECTOR && vcpu->arch.exception.nested_apf))) return 0; + if (vcpu->arch.exception.nested_apf) { + vmcs_write32(VM_EXIT_INTR_ERROR_CODE, vcpu->arch.exception.error_code); + nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, + PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | + INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK, + vcpu->arch.apf.nested_apf_token); + return 1; + } + nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, vmcs_read32(VM_EXIT_INTR_INFO), vmcs_readl(EXIT_QUALIFICATION)); return 1; } -static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, - bool has_error_code, u32 error_code, - bool reinject) +static void vmx_queue_exception(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned nr = vcpu->arch.exception.nr; + bool has_error_code = vcpu->arch.exception.has_error_code; + bool reinject = vcpu->arch.exception.reinject; + u32 error_code = vcpu->arch.exception.error_code; u32 intr_info = nr | INTR_INFO_VALID_MASK; if (!reinject && is_guest_mode(vcpu) && - nested_vmx_check_exception(vcpu, nr)) + nested_vmx_check_exception(vcpu)) return; if (has_error_code) { @@ -3764,6 +3777,25 @@ static void free_kvm_area(void) } } +enum vmcs_field_type { + VMCS_FIELD_TYPE_U16 = 0, + VMCS_FIELD_TYPE_U64 = 1, + VMCS_FIELD_TYPE_U32 = 2, + VMCS_FIELD_TYPE_NATURAL_WIDTH = 3 +}; + +static inline int vmcs_field_type(unsigned long field) +{ + if (0x1 & field) /* the *_HIGH fields are all 32 bit */ + return VMCS_FIELD_TYPE_U32; + return (field >> 13) & 0x3 ; +} + +static inline int vmcs_field_readonly(unsigned long field) +{ + return (((field >> 10) & 0x3) == 1); +} + static void init_vmcs_shadow_fields(void) { int i, j; @@ -3789,14 +3821,22 @@ static void init_vmcs_shadow_fields(void) /* shadowed fields guest access without vmexit */ for (i = 0; i < max_shadow_read_write_fields; i++) { - clear_bit(shadow_read_write_fields[i], - vmx_vmwrite_bitmap); - clear_bit(shadow_read_write_fields[i], - vmx_vmread_bitmap); + unsigned long field = shadow_read_write_fields[i]; + + clear_bit(field, vmx_vmwrite_bitmap); + clear_bit(field, vmx_vmread_bitmap); + if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64) { + clear_bit(field + 1, vmx_vmwrite_bitmap); + clear_bit(field + 1, vmx_vmread_bitmap); + } + } + for (i = 0; i < max_shadow_read_only_fields; i++) { + unsigned long field = shadow_read_only_fields[i]; + + clear_bit(field, vmx_vmread_bitmap); + if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64) + clear_bit(field + 1, vmx_vmread_bitmap); } - for (i = 0; i < max_shadow_read_only_fields; i++) - clear_bit(shadow_read_only_fields[i], - vmx_vmread_bitmap); } static __init int alloc_kvm_area(void) @@ -4634,6 +4674,11 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu) return true; } +static bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa) +{ + return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu)); +} + static int init_rmode_tss(struct kvm *kvm) { gfn_t fn; @@ -5664,14 +5709,11 @@ static int handle_exception(struct kvm_vcpu *vcpu) } if (is_page_fault(intr_info)) { - /* EPT won't cause page fault directly */ - BUG_ON(enable_ept); cr2 = vmcs_readl(EXIT_QUALIFICATION); - trace_kvm_page_fault(cr2, error_code); - - if (kvm_event_needs_reinjection(vcpu)) - kvm_mmu_unprotect_page_virt(vcpu, cr2); - return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0); + /* EPT won't cause page fault directly */ + WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept); + return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0, + true); } ex_no = intr_info & INTR_INFO_VECTOR_MASK; @@ -7214,25 +7256,6 @@ static int handle_vmresume(struct kvm_vcpu *vcpu) return nested_vmx_run(vcpu, false); } -enum vmcs_field_type { - VMCS_FIELD_TYPE_U16 = 0, - VMCS_FIELD_TYPE_U64 = 1, - VMCS_FIELD_TYPE_U32 = 2, - VMCS_FIELD_TYPE_NATURAL_WIDTH = 3 -}; - -static inline int vmcs_field_type(unsigned long field) -{ - if (0x1 & field) /* the *_HIGH fields are all 32 bit */ - return VMCS_FIELD_TYPE_U32; - return (field >> 13) & 0x3 ; -} - -static inline int vmcs_field_readonly(unsigned long field) -{ - return (((field >> 10) & 0x3) == 1); -} - /* * Read a vmcs12 field. Since these can have varying lengths and we return * one type, we chose the biggest type (u64) and zero-extend the return value @@ -8014,7 +8037,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) if (is_nmi(intr_info)) return false; else if (is_page_fault(intr_info)) - return enable_ept; + return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept; else if (is_no_device(intr_info) && !(vmcs12->guest_cr0 & X86_CR0_TS)) return false; @@ -8418,9 +8441,15 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) exit_reason != EXIT_REASON_TASK_SWITCH)) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV; - vcpu->run->internal.ndata = 2; + vcpu->run->internal.ndata = 3; vcpu->run->internal.data[0] = vectoring_info; vcpu->run->internal.data[1] = exit_reason; + vcpu->run->internal.data[2] = vcpu->arch.exit_qualification; + if (exit_reason == EXIT_REASON_EPT_MISCONFIG) { + vcpu->run->internal.ndata++; + vcpu->run->internal.data[3] = + vmcs_read64(GUEST_PHYSICAL_ADDRESS); + } return 0; } @@ -8611,17 +8640,24 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu) static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) { - u32 exit_intr_info; + u32 exit_intr_info = 0; + u16 basic_exit_reason = (u16)vmx->exit_reason; - if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY - || vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)) + if (!(basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY + || basic_exit_reason == EXIT_REASON_EXCEPTION_NMI)) return; - vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - exit_intr_info = vmx->exit_intr_info; + if (!(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) + exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + vmx->exit_intr_info = exit_intr_info; + + /* if exit due to PF check for async PF */ + if (is_page_fault(exit_intr_info)) + vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason(); /* Handle machine checks before interrupts are enabled */ - if (is_machine_check(exit_intr_info)) + if (basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY || + is_machine_check(exit_intr_info)) kvm_machine_check(); /* We need to handle NMIs before interrupts are enabled */ @@ -9589,23 +9625,26 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL); } +static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) + return 0; + + if (!page_address_valid(vcpu, vmcs12->io_bitmap_a) || + !page_address_valid(vcpu, vmcs12->io_bitmap_b)) + return -EINVAL; + + return 0; +} + static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { - int maxphyaddr; - u64 addr; - if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) return 0; - if (vmcs12_read_any(vcpu, MSR_BITMAP, &addr)) { - WARN_ON(1); - return -EINVAL; - } - maxphyaddr = cpuid_maxphyaddr(vcpu); - - if (!PAGE_ALIGNED(vmcs12->msr_bitmap) || - ((addr + PAGE_SIZE) >> maxphyaddr)) + if (!page_address_valid(vcpu, vmcs12->msr_bitmap)) return -EINVAL; return 0; @@ -10293,6 +10332,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + if (nested_vmx_check_io_bitmap_controls(vcpu, vmcs12)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; @@ -10429,8 +10471,6 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) return 1; } - vmcs12->launch_state = 1; - /* * Note no nested_vmx_succeed or nested_vmx_fail here. At this point * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet @@ -10804,6 +10844,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) { + vmcs12->launch_state = 1; + /* vm_entry_intr_info_field is cleared on exit. Emulate this * instead of reading the real value. */ vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6c7266f7766d..5b8f07889f6a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -134,8 +134,6 @@ module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); static bool __read_mostly vector_hashing = true; module_param(vector_hashing, bool, S_IRUGO); -static bool __read_mostly backwards_tsc_observed = false; - #define KVM_NR_SHARED_MSRS 16 struct kvm_shared_msrs_global { @@ -452,7 +450,12 @@ EXPORT_SYMBOL_GPL(kvm_complete_insn_gp); void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) { ++vcpu->stat.pf_guest; - vcpu->arch.cr2 = fault->address; + vcpu->arch.exception.nested_apf = + is_guest_mode(vcpu) && fault->async_page_fault; + if (vcpu->arch.exception.nested_apf) + vcpu->arch.apf.nested_apf_token = fault->address; + else + vcpu->arch.cr2 = fault->address; kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code); } EXPORT_SYMBOL_GPL(kvm_inject_page_fault); @@ -1719,7 +1722,7 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm) &ka->master_cycle_now); ka->use_master_clock = host_tsc_clocksource && vcpus_matched - && !backwards_tsc_observed + && !ka->backwards_tsc_observed && !ka->boot_vcpu_runs_old_kvmclock; if (ka->use_master_clock) @@ -2060,8 +2063,8 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) { gpa_t gpa = data & ~0x3f; - /* Bits 2:5 are reserved, Should be zero */ - if (data & 0x3c) + /* Bits 3:5 are reserved, Should be zero */ + if (data & 0x38) return 1; vcpu->arch.apf.msr_val = data; @@ -2077,6 +2080,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) return 1; vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS); + vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT; kvm_async_pf_wakeup_all(vcpu); return 0; } @@ -2661,6 +2665,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_HYPERV_VAPIC: case KVM_CAP_HYPERV_SPIN: case KVM_CAP_HYPERV_SYNIC: + case KVM_CAP_HYPERV_SYNIC2: + case KVM_CAP_HYPERV_VP_INDEX: case KVM_CAP_PCI_SEGMENT: case KVM_CAP_DEBUGREGS: case KVM_CAP_X86_ROBUST_SINGLESTEP: @@ -3384,10 +3390,14 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return -EINVAL; switch (cap->cap) { + case KVM_CAP_HYPERV_SYNIC2: + if (cap->args[0]) + return -EINVAL; case KVM_CAP_HYPERV_SYNIC: if (!irqchip_in_kernel(vcpu->kvm)) return -EINVAL; - return kvm_hv_activate_synic(vcpu); + return kvm_hv_activate_synic(vcpu, cap->cap == + KVM_CAP_HYPERV_SYNIC2); default: return -EINVAL; } @@ -4188,9 +4198,15 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; r = 0; + /* + * TODO: userspace has to take care of races with VCPU_RUN, so + * kvm_gen_update_masterclock() can be cut down to locked + * pvclock_update_vm_gtod_copy(). + */ + kvm_gen_update_masterclock(kvm); now_ns = get_kvmclock_ns(kvm); kvm->arch.kvmclock_offset += user_ns.clock - now_ns; - kvm_gen_update_masterclock(kvm); + kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE); break; } case KVM_GET_CLOCK: { @@ -6347,10 +6363,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) kvm_update_dr7(vcpu); } - kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, - vcpu->arch.exception.has_error_code, - vcpu->arch.exception.error_code, - vcpu->arch.exception.reinject); + kvm_x86_ops->queue_exception(vcpu); return 0; } @@ -7676,6 +7689,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) struct msr_data msr; struct kvm *kvm = vcpu->kvm; + kvm_hv_vcpu_postcreate(vcpu); + if (vcpu_load(vcpu)) return; msr.data = 0x0; @@ -7829,8 +7844,8 @@ int kvm_arch_hardware_enable(void) */ if (backwards_tsc) { u64 delta_cyc = max_tsc - local_tsc; - backwards_tsc_observed = true; list_for_each_entry(kvm, &vm_list, vm_list) { + kvm->arch.backwards_tsc_observed = true; kvm_for_each_vcpu(i, vcpu, kvm) { vcpu->arch.tsc_offset_adjustment += delta_cyc; vcpu->arch.last_host_tsc = local_tsc; @@ -8576,6 +8591,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, fault.error_code = 0; fault.nested_page_fault = false; fault.address = work->arch.token; + fault.async_page_fault = true; kvm_inject_page_fault(vcpu, &fault); } } @@ -8598,6 +8614,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, fault.error_code = 0; fault.nested_page_fault = false; fault.address = work->arch.token; + fault.async_page_fault = true; kvm_inject_page_fault(vcpu, &fault); } vcpu->arch.apf.halted = false; diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c index cad12634d6bd..2eab7d0bfedd 100644 --- a/arch/x86/lib/memcpy_32.c +++ b/arch/x86/lib/memcpy_32.c @@ -6,7 +6,7 @@ __visible void *memcpy(void *to, const void *from, size_t n) { -#ifdef CONFIG_X86_USE_3DNOW +#if defined(CONFIG_X86_USE_3DNOW) && !defined(CONFIG_FORTIFY_SOURCE) return __memcpy3d(to, from, n); #else return __memcpy(to, from, n); diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 88215ac16b24..02c9d7553409 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -23,12 +23,7 @@ static int __init map_range(struct range *range) start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start)); end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end)); - /* - * end + 1 here is intentional. We check several shadow bytes in advance - * to slightly speed up fastpath. In some rare cases we could cross - * boundary of mapped shadow, so we just map some more here. - */ - return vmemmap_populate(start, end + 1, NUMA_NO_NODE); + return vmemmap_populate(start, end, NUMA_NO_NODE); } static void __init clear_pgds(unsigned long start, diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 797295e792b2..229d04a83f85 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -92,13 +92,18 @@ unsigned long arch_mmap_rnd(void) static unsigned long mmap_base(unsigned long rnd, unsigned long task_size) { unsigned long gap = rlimit(RLIMIT_STACK); + unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap; unsigned long gap_min, gap_max; + /* Values close to RLIM_INFINITY can overflow. */ + if (gap + pad > gap) + gap += pad; + /* * Top of mmap area (just below the process stack). * Leave an at least ~128 MB hole with possible stack randomization. */ - gap_min = SIZE_128M + stack_maxrandom_size(task_size); + gap_min = SIZE_128M; gap_max = (task_size / 6) * 5; if (gap < gap_min) diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c index 00f54a91bb4b..28775f55bde2 100644 --- a/arch/x86/um/os-Linux/registers.c +++ b/arch/x86/um/os-Linux/registers.c @@ -26,6 +26,7 @@ int save_i387_registers(int pid, unsigned long *fp_regs) int save_fp_registers(int pid, unsigned long *fp_regs) { +#ifdef PTRACE_GETREGSET struct iovec iov; if (have_xstate_support) { @@ -34,9 +35,9 @@ int save_fp_registers(int pid, unsigned long *fp_regs) if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) < 0) return -errno; return 0; - } else { + } else +#endif return save_i387_registers(pid, fp_regs); - } } int restore_i387_registers(int pid, unsigned long *fp_regs) @@ -48,6 +49,7 @@ int restore_i387_registers(int pid, unsigned long *fp_regs) int restore_fp_registers(int pid, unsigned long *fp_regs) { +#ifdef PTRACE_SETREGSET struct iovec iov; if (have_xstate_support) { @@ -56,9 +58,9 @@ int restore_fp_registers(int pid, unsigned long *fp_regs) if (ptrace(PTRACE_SETREGSET, pid, NT_X86_XSTATE, &iov) < 0) return -errno; return 0; - } else { + } else +#endif return restore_i387_registers(pid, fp_regs); - } } #ifdef __i386__ @@ -122,6 +124,7 @@ int put_fp_registers(int pid, unsigned long *regs) void arch_init_registers(int pid) { +#ifdef PTRACE_GETREGSET struct _xstate fp_regs; struct iovec iov; @@ -129,6 +132,7 @@ void arch_init_registers(int pid) iov.iov_len = sizeof(struct _xstate); if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) == 0) have_xstate_support = 1; +#endif } #endif diff --git a/arch/x86/um/setjmp_32.S b/arch/x86/um/setjmp_32.S index b766792c9933..39053192918d 100644 --- a/arch/x86/um/setjmp_32.S +++ b/arch/x86/um/setjmp_32.S @@ -16,9 +16,9 @@ .text .align 4 - .globl setjmp - .type setjmp, @function -setjmp: + .globl kernel_setjmp + .type kernel_setjmp, @function +kernel_setjmp: #ifdef _REGPARM movl %eax,%edx #else @@ -35,13 +35,13 @@ setjmp: movl %ecx,20(%edx) # Return address ret - .size setjmp,.-setjmp + .size kernel_setjmp,.-kernel_setjmp .text .align 4 - .globl longjmp - .type longjmp, @function -longjmp: + .globl kernel_longjmp + .type kernel_longjmp, @function +kernel_longjmp: #ifdef _REGPARM xchgl %eax,%edx #else @@ -55,4 +55,4 @@ longjmp: movl 16(%edx),%edi jmp *20(%edx) - .size longjmp,.-longjmp + .size kernel_longjmp,.-kernel_longjmp diff --git a/arch/x86/um/setjmp_64.S b/arch/x86/um/setjmp_64.S index 45f547b4043e..c56942e1a38c 100644 --- a/arch/x86/um/setjmp_64.S +++ b/arch/x86/um/setjmp_64.S @@ -18,9 +18,9 @@ .text .align 4 - .globl setjmp - .type setjmp, @function -setjmp: + .globl kernel_setjmp + .type kernel_setjmp, @function +kernel_setjmp: pop %rsi # Return address, and adjust the stack xorl %eax,%eax # Return value movq %rbx,(%rdi) @@ -34,13 +34,13 @@ setjmp: movq %rsi,56(%rdi) # Return address ret - .size setjmp,.-setjmp + .size kernel_setjmp,.-kernel_setjmp .text .align 4 - .globl longjmp - .type longjmp, @function -longjmp: + .globl kernel_longjmp + .type kernel_longjmp, @function +kernel_longjmp: movl %esi,%eax # Return value (int) movq (%rdi),%rbx movq 8(%rdi),%rsp @@ -51,4 +51,4 @@ longjmp: movq 48(%rdi),%r15 jmp *56(%rdi) - .size longjmp,.-longjmp + .size kernel_longjmp,.-kernel_longjmp diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c index cb3c22370cf5..ae4cd58c0c7a 100644 --- a/arch/x86/um/user-offsets.c +++ b/arch/x86/um/user-offsets.c @@ -5,7 +5,7 @@ #include <sys/mman.h> #include <sys/user.h> #define __FRAME_OFFSETS -#include <asm/ptrace.h> +#include <linux/ptrace.h> #include <asm/types.h> #ifdef __i386__ @@ -50,7 +50,11 @@ void foo(void) DEFINE(HOST_GS, GS); DEFINE(HOST_ORIG_AX, ORIG_EAX); #else +#if defined(PTRACE_GETREGSET) && defined(PTRACE_SETREGSET) DEFINE(HOST_FP_SIZE, sizeof(struct _xstate) / sizeof(unsigned long)); +#else + DEFINE(HOST_FP_SIZE, sizeof(struct _fpstate) / sizeof(unsigned long)); +#endif DEFINE_LONGS(HOST_BX, RBX); DEFINE_LONGS(HOST_CX, RCX); DEFINE_LONGS(HOST_DI, RDI); diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 1d7a7213a310..cab28cf2cffb 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2693,8 +2693,8 @@ EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); phys_addr_t paddr_vmcoreinfo_note(void) { if (xen_pv_domain()) - return virt_to_machine(&vmcoreinfo_note).maddr; + return virt_to_machine(vmcoreinfo_note).maddr; else - return __pa_symbol(&vmcoreinfo_note); + return __pa(vmcoreinfo_note); } #endif /* CONFIG_KEXEC_CORE */ |