From 6c6c5e0311c83ffe75e14260fb83e05e21e1d488 Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Fri, 13 Jan 2017 18:59:04 +0100 Subject: KVM: VMX: downgrade warning on unexpected exit code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We never needed the call trace and we better rate-limit if it can be triggered by a guest. Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 98e82ee1e699..e7ec88961b1a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8501,7 +8501,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) && kvm_vmx_exit_handlers[exit_reason]) return kvm_vmx_exit_handlers[exit_reason](vcpu); else { - WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason); + vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", + exit_reason); kvm_queue_exception(vcpu, UD_VECTOR); return 1; } -- cgit From 3863dff0c3dd72984395c93b12383b393c5c3989 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 24 Jan 2017 14:06:48 +0100 Subject: kvm: fix usage of uninit spinlock in avic_vm_destroy() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If avic is not enabled, avic_vm_init() does nothing and returns early. However, avic_vm_destroy() still tries to destroy what hasn't been created. The only bad consequence of this now is that avic_vm_destroy() uses svm_vm_data_hash_lock that hasn't been initialized (and is not meant to be used at all if avic is not enabled). Return early from avic_vm_destroy() if avic is not enabled. It has nothing to destroy. Signed-off-by: Dmitry Vyukov Cc: Joerg Roedel Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: David Hildenbrand Cc: kvm@vger.kernel.org Cc: syzkaller@googlegroups.com Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář --- arch/x86/kvm/svm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d1efe2c62b3f..5fba70646c32 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1379,6 +1379,9 @@ static void avic_vm_destroy(struct kvm *kvm) unsigned long flags; struct kvm_arch *vm_data = &kvm->arch; + if (!avic) + return; + avic_free_vm_id(vm_data->avic_vm_id); if (vm_data->avic_logical_id_table_page) -- cgit From 6d1b3ad2cd87150fc89bad2331beab173a8ad24d Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sun, 12 Mar 2017 00:53:52 -0800 Subject: KVM: nVMX: don't reset kvm mmu twice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kvm mmu is reset once successfully loading CR3 as part of emulating vmentry in nested_vmx_load_cr3(). We should not reset kvm mmu twice. Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e7ec88961b1a..c66436530a93 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10287,8 +10287,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, entry_failure_code)) return 1; - kvm_mmu_reset_context(vcpu); - if (!enable_ept) vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; -- cgit From ad4830051aac0b967add82ac168f49edf11f01a0 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 21 Mar 2017 18:16:47 -0500 Subject: x86/platform/uv: Fix calculation of Global Physical Address The calculation of the global physical address (GPA) on UV4 is incorrect. The gnode_extra/upper global offset should only be applied for fixed address space systems (UV1..3). Tested-by: John Estabrook Signed-off-by: Mike Travis Cc: Dimitri Sivanich Cc: John Estabrook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Russ Anderson Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170321231646.667689538@asylum.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_hub.h | 8 +++++--- arch/x86/kernel/apic/x2apic_uv_x.c | 3 ++- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 72e8300b1e8a..9cffb44a3cf5 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -485,15 +485,17 @@ static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) if (paddr < uv_hub_info->lowmem_remap_top) paddr |= uv_hub_info->lowmem_remap_base; - paddr |= uv_hub_info->gnode_upper; - if (m_val) + + if (m_val) { + paddr |= uv_hub_info->gnode_upper; paddr = ((paddr << uv_hub_info->m_shift) >> uv_hub_info->m_shift) | ((paddr >> uv_hub_info->m_val) << uv_hub_info->n_lshift); - else + } else { paddr |= uv_soc_phys_ram_to_nasid(paddr) << uv_hub_info->gpa_shift; + } return paddr; } diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index e9f8f8cdd570..86f20cc0a65e 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -1105,7 +1105,8 @@ void __init uv_init_hub_info(struct uv_hub_info_s *hi) node_id.v = uv_read_local_mmr(UVH_NODE_ID); uv_cpuid.gnode_shift = max_t(unsigned int, uv_cpuid.gnode_shift, mn.n_val); hi->gnode_extra = (node_id.s.node_id & ~((1 << uv_cpuid.gnode_shift) - 1)) >> 1; - hi->gnode_upper = (unsigned long)hi->gnode_extra << mn.m_val; + if (mn.m_val) + hi->gnode_upper = (u64)hi->gnode_extra << mn.m_val; if (uv_gp_table) { hi->global_mmr_base = uv_gp_table->mmr_base; -- cgit From 26a37ab319a26d330bab298770d692bb9c852aff Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 20 Mar 2017 14:40:30 -0700 Subject: x86/mce: Fix copy/paste error in exception table entries Back in commit: 92b0729c34cab ("x86/mm, x86/mce: Add memcpy_mcsafe()") ... I made a copy/paste error setting up the exception table entries and ended up with two for label .L_cache_w3 and none for .L_cache_w2. This means that if we take a machine check on: .L_cache_w2: movq 2*8(%rsi), %r10 then we don't have an exception table entry for this instruction and we can't recover. Fix: s/3/2/ Signed-off-by: Tony Luck Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 92b0729c34cab ("x86/mm, x86/mce: Add memcpy_mcsafe()") Link: http://lkml.kernel.org/r/1490046030-25862-1-git-send-email-tony.luck@intel.com Signed-off-by: Ingo Molnar --- arch/x86/lib/memcpy_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 779782f58324..9a53a06e5a3e 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -290,7 +290,7 @@ EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail) -- cgit From 698eff6355f735d46d1b7113df8b422874cd7988 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Mar 2017 12:48:18 +0100 Subject: sched/clock, x86/perf: Fix "perf test tsc" People reported that commit: 5680d8094ffa ("sched/clock: Provide better clock continuity") broke "perf test tsc". That commit added another offset to the reported clock value; so take that into account when computing the provided offset values. Reported-by: Adrian Hunter Reported-by: Arnaldo Carvalho de Melo Tested-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 5680d8094ffa ("sched/clock: Provide better clock continuity") Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 9 ++++++--- arch/x86/include/asm/timer.h | 2 ++ arch/x86/kernel/tsc.c | 4 ++-- 3 files changed, 10 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 2aa1ad194db2..580b60f5ac83 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2256,6 +2256,7 @@ void arch_perf_update_userpage(struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now) { struct cyc2ns_data *data; + u64 offset; userpg->cap_user_time = 0; userpg->cap_user_time_zero = 0; @@ -2263,11 +2264,13 @@ void arch_perf_update_userpage(struct perf_event *event, !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED); userpg->pmc_width = x86_pmu.cntval_bits; - if (!sched_clock_stable()) + if (!using_native_sched_clock() || !sched_clock_stable()) return; data = cyc2ns_read_begin(); + offset = data->cyc2ns_offset + __sched_clock_offset; + /* * Internal timekeeping for enabled/running/stopped times * is always in the local_clock domain. @@ -2275,7 +2278,7 @@ void arch_perf_update_userpage(struct perf_event *event, userpg->cap_user_time = 1; userpg->time_mult = data->cyc2ns_mul; userpg->time_shift = data->cyc2ns_shift; - userpg->time_offset = data->cyc2ns_offset - now; + userpg->time_offset = offset - now; /* * cap_user_time_zero doesn't make sense when we're using a different @@ -2283,7 +2286,7 @@ void arch_perf_update_userpage(struct perf_event *event, */ if (!event->attr.use_clockid) { userpg->cap_user_time_zero = 1; - userpg->time_zero = data->cyc2ns_offset; + userpg->time_zero = offset; } cyc2ns_read_end(data); diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index a04eabd43d06..27e9f9d769b8 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -12,6 +12,8 @@ extern int recalibrate_cpu_khz(void); extern int no_timer_check; +extern bool using_native_sched_clock(void); + /* * We use the full linear equation: f(x) = a + b*x, in order to allow * a continuous function in the face of dynamic freq changes. diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index c73a7f9e881a..714dfba6a1e7 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -328,7 +328,7 @@ unsigned long long sched_clock(void) return paravirt_sched_clock(); } -static inline bool using_native_sched_clock(void) +bool using_native_sched_clock(void) { return pv_time_ops.sched_clock == native_sched_clock; } @@ -336,7 +336,7 @@ static inline bool using_native_sched_clock(void) unsigned long long sched_clock(void) __attribute__((alias("native_sched_clock"))); -static inline bool using_native_sched_clock(void) { return true; } +bool using_native_sched_clock(void) { return true; } #endif int check_tsc_unstable(void) -- cgit From 950712eb8ef03e4a62ac5b3067efde7ec2f8a91e Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 15 Mar 2017 16:01:18 +0800 Subject: KVM: x86: check existance before destroy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mostly used for split irqchip mode. In that case, these two things are not inited at all, so no need to release. Signed-off-by: Peter Xu Signed-off-by: Radim Krčmář --- arch/x86/kvm/i8259.c | 3 +++ arch/x86/kvm/ioapic.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 73ea24d4f119..047b17a26269 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -657,6 +657,9 @@ void kvm_pic_destroy(struct kvm *kvm) { struct kvm_pic *vpic = kvm->arch.vpic; + if (!vpic) + return; + kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master); kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave); kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr); diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 6e219e5c07d2..289270a6aecb 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -635,6 +635,9 @@ void kvm_ioapic_destroy(struct kvm *kvm) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; + if (!ioapic) + return; + cancel_delayed_work_sync(&ioapic->eoi_inject); kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); kvm->arch.vioapic = NULL; -- cgit From c761159cf81b8641290f7559a8d8e30f6ab92669 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 15 Mar 2017 16:01:19 +0800 Subject: KVM: x86: use pic/ioapic destructor when destroy vm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have specific destructors for pic/ioapic, we'd better use them when destroying the VM as well. Signed-off-by: Peter Xu Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1faf620a6fdc..d30ff491ecc7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8153,8 +8153,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) if (kvm_x86_ops->vm_destroy) kvm_x86_ops->vm_destroy(kvm); kvm_iommu_unmap_guest(kvm); - kfree(kvm->arch.vpic); - kfree(kvm->arch.vioapic); + kvm_pic_destroy(kvm); + kvm_ioapic_destroy(kvm); kvm_free_vcpus(kvm); kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); kvm_mmu_uninit_vm(kvm); -- cgit From fb6c8198431311027c3434d4e94ab8bc040f7aea Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Thu, 16 Mar 2017 13:53:59 -0700 Subject: kvm: vmx: Flush TLB when the APIC-access address changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Quoting from the Intel SDM, volume 3, section 28.3.3.4: Guidelines for Use of the INVEPT Instruction: If EPT was in use on a logical processor at one time with EPTP X, it is recommended that software use the INVEPT instruction with the "single-context" INVEPT type and with EPTP X in the INVEPT descriptor before a VM entry on the same logical processor that enables EPT with EPTP X and either (a) the "virtualize APIC accesses" VM-execution control was changed from 0 to 1; or (b) the value of the APIC-access address was changed. In the nested case, the burden falls on L1, unless L0 enables EPT in vmcs02 when L1 doesn't enable EPT in vmcs12. Signed-off-by: Jim Mattson Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c66436530a93..e2f608283a5a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4024,6 +4024,12 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu) __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid); } +static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu) +{ + if (enable_ept) + vmx_flush_tlb(vcpu); +} + static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) { ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; @@ -8548,6 +8554,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) } else { sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + vmx_flush_tlb_ept_only(vcpu); } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); @@ -8573,8 +8580,10 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) */ if (!is_guest_mode(vcpu) || !nested_cpu_has2(get_vmcs12(&vmx->vcpu), - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { vmcs_write64(APIC_ACCESS_ADDR, hpa); + vmx_flush_tlb_ept_only(vcpu); + } } static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) @@ -10256,6 +10265,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (nested_cpu_has_ept(vmcs12)) { kvm_mmu_unload(vcpu); nested_ept_init_mmu_context(vcpu); + } else if (nested_cpu_has2(vmcs12, + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { + vmx_flush_tlb_ept_only(vcpu); } /* @@ -11055,6 +11067,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmx->nested.change_vmcs01_virtual_x2apic_mode = false; vmx_set_virtual_x2apic_mode(vcpu, vcpu->arch.apic_base & X2APIC_ENABLE); + } else if (!nested_cpu_has_ept(vmcs12) && + nested_cpu_has2(vmcs12, + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { + vmx_flush_tlb_ept_only(vcpu); } /* This is needed for same reason as it was needed in prepare_vmcs02 */ -- cgit From 24dccf83a121b8a4ad5c2ad383a8184ef6c266ee Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 Mar 2017 21:18:55 -0700 Subject: KVM: x86: correct async page present tracepoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After async pf setup successfully, there is a broadcast wakeup w/ special token 0xffffffff which tells vCPU that it should wake up all processes waiting for APFs though there is no real process waiting at the moment. The async page present tracepoint print prematurely and fails to catch the special token setup. This patch fixes it by moving the async page present tracepoint after the special token setup. Before patch: qemu-system-x86-8499 [006] ...1 5973.473292: kvm_async_pf_ready: token 0x0 gva 0x0 After patch: qemu-system-x86-8499 [006] ...1 5973.473292: kvm_async_pf_ready: token 0xffffffff gva 0x0 Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d30ff491ecc7..64697fe475c3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8566,11 +8566,11 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, { struct x86_exception fault; - trace_kvm_async_pf_ready(work->arch.token, work->gva); if (work->wakeup_all) work->arch.token = ~0; /* broadcast wakeup */ else kvm_del_async_pf_gfn(vcpu, work->arch.gfn); + trace_kvm_async_pf_ready(work->arch.token, work->gva); if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) && !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { -- cgit From 63cb6d5f004ca44f9b8e562b6dd191f717a4960e Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 Mar 2017 21:18:53 -0700 Subject: KVM: nVMX: Fix nested VPID vmx exec control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This can be reproduced by running kvm-unit-tests/vmx.flat on L0 w/ vpid disabled. Test suite: VPID Unhandled exception 6 #UD at ip 00000000004051a6 error_code=0000 rflags=00010047 cs=00000008 rax=0000000000000000 rcx=0000000000000001 rdx=0000000000000047 rbx=0000000000402f79 rbp=0000000000456240 rsi=0000000000000001 rdi=0000000000000000 r8=000000000000000a r9=00000000000003f8 r10=0000000080010011 r11=0000000000000000 r12=0000000000000003 r13=0000000000000708 r14=0000000000000000 r15=0000000000000000 cr0=0000000080010031 cr2=0000000000000000 cr3=0000000007fff000 cr4=0000000000002020 cr8=0000000000000000 STACK: @4051a6 40523e 400f7f 402059 40028f We should hide and forbid VPID in L1 if it is disabled on L0. However, nested VPID enable bit is set unconditionally during setup nested vmx exec controls though VPID is not exposed through nested VMX capablity. This patch fixes it by don't set nested VPID enable bit if it is disabled on L0. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: stable@vger.kernel.org Fixes: 5c614b3583e (KVM: nVMX: nested VPID emulation) Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e2f608283a5a..5a82766e4b07 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2753,7 +2753,6 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_DESC | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | - SECONDARY_EXEC_ENABLE_VPID | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_WBINVD_EXITING | @@ -2781,10 +2780,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) * though it is treated as global context. The alternative is * not failing the single-context invvpid, and it is worse. */ - if (enable_vpid) + if (enable_vpid) { + vmx->nested.nested_vmx_secondary_ctls_high |= + SECONDARY_EXEC_ENABLE_VPID; vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT | VMX_VPID_EXTENT_SUPPORTED_MASK; - else + } else vmx->nested.nested_vmx_vpid_caps = 0; if (enable_unrestricted_guest) -- cgit From 08d839c4b134b8328ec42f2157a9ca4b93227c03 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 23 Mar 2017 05:30:08 -0700 Subject: KVM: VMX: Fix enable VPID conditions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This can be reproduced by running L2 on L1, and disable VPID on L0 if w/o commit "KVM: nVMX: Fix nested VPID vmx exec control", the L2 crash as below: KVM: entry failed, hardware error 0x7 EAX=00000000 EBX=00000000 ECX=00000000 EDX=000306c3 ESI=00000000 EDI=00000000 EBP=00000000 ESP=00000000 EIP=0000fff0 EFL=00000002 [-------] CPL=0 II=0 A20=1 SMM=0 HLT=0 ES =0000 00000000 0000ffff 00009300 CS =f000 ffff0000 0000ffff 00009b00 SS =0000 00000000 0000ffff 00009300 DS =0000 00000000 0000ffff 00009300 FS =0000 00000000 0000ffff 00009300 GS =0000 00000000 0000ffff 00009300 LDT=0000 00000000 0000ffff 00008200 TR =0000 00000000 0000ffff 00008b00 GDT= 00000000 0000ffff IDT= 00000000 0000ffff CR0=60000010 CR2=00000000 CR3=00000000 CR4=00000000 DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000 DR6=00000000ffff0ff0 DR7=0000000000000400 EFER=0000000000000000 Reference SDM 30.3 INVVPID: Protected Mode Exceptions - #UD - If not in VMX operation. - If the logical processor does not support VPIDs (IA32_VMX_PROCBASED_CTLS2[37]=0). - If the logical processor supports VPIDs (IA32_VMX_PROCBASED_CTLS2[37]=1) but does not support the INVVPID instruction (IA32_VMX_EPT_VPID_CAP[32]=0). So we should check both VPID enable bit in vmx exec control and INVVPID support bit in vmx capability MSRs to enable VPID. This patch adds the guarantee to not enable VPID if either INVVPID or single-context/all-context invalidation is not exposed in vmx capability MSRs. Reviewed-by: David Hildenbrand Reviewed-by: Jim Mattson Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5a82766e4b07..cd1ba62878f3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1239,6 +1239,11 @@ static inline bool cpu_has_vmx_invvpid_global(void) return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; } +static inline bool cpu_has_vmx_invvpid(void) +{ + return vmx_capability.vpid & VMX_VPID_INVVPID_BIT; +} + static inline bool cpu_has_vmx_ept(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & @@ -6524,8 +6529,10 @@ static __init int hardware_setup(void) if (boot_cpu_has(X86_FEATURE_NX)) kvm_enable_efer_bits(EFER_NX); - if (!cpu_has_vmx_vpid()) + if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() || + !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global())) enable_vpid = 0; + if (!cpu_has_vmx_shadow_vmcs()) enable_shadow_vmcs = 0; if (enable_shadow_vmcs) -- cgit From a46f60d76004965e5669dbf3fc21ef3bc3632eb4 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 24 Mar 2017 12:59:52 +0800 Subject: x86/mm/KASLR: Exclude EFI region from KASLR VA space randomization Currently KASLR is enabled on three regions: the direct mapping of physical memory, vamlloc and vmemmap. However the EFI region is also mistakenly included for VA space randomization because of misusing EFI_VA_START macro and assuming EFI_VA_START < EFI_VA_END. (This breaks kexec and possibly other things that rely on stable addresses.) The EFI region is reserved for EFI runtime services virtual mapping which should not be included in KASLR ranges. In Documentation/x86/x86_64/mm.txt, we can see: ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space EFI uses the space from -4G to -64G thus EFI_VA_START > EFI_VA_END, Here EFI_VA_START = -4G, and EFI_VA_END = -64G. Changing EFI_VA_START to EFI_VA_END in mm/kaslr.c fixes this problem. Signed-off-by: Baoquan He Reviewed-by: Bhupesh Sharma Acked-by: Dave Young Acked-by: Thomas Garnier Cc: #4.8+ Cc: Andrew Morton Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Kees Cook Cc: Linus Torvalds Cc: Masahiro Yamada Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1490331592-31860-1-git-send-email-bhe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/mm/kaslr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 887e57182716..aed206475aa7 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -48,7 +48,7 @@ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; #if defined(CONFIG_X86_ESPFIX64) static const unsigned long vaddr_end = ESPFIX_BASE_ADDR; #elif defined(CONFIG_EFI) -static const unsigned long vaddr_end = EFI_VA_START; +static const unsigned long vaddr_end = EFI_VA_END; #else static const unsigned long vaddr_end = __START_KERNEL_map; #endif @@ -105,7 +105,7 @@ void __init kernel_randomize_memory(void) */ BUILD_BUG_ON(vaddr_start >= vaddr_end); BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) && - vaddr_end >= EFI_VA_START); + vaddr_end >= EFI_VA_END); BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) || IS_ENABLED(CONFIG_EFI)) && vaddr_end >= __START_KERNEL_map); -- cgit From cc66afea58f858ff6da7f79b8a595a67bbb4f9a9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 27 Mar 2017 11:32:59 +0200 Subject: x86/mce: Don't print MCEs when mcelog is active Since: cd9c57cad3fe ("x86/MCE: Dump MCE to dmesg if no consumers") all MCEs are printed even when mcelog is running. Fix the regression to not print to dmesg when mcelog is running as it is a consumer too. Signed-off-by: Andi Kleen [ Massage commit message. ] Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Cc: stable@vger.kernel.org # 4.10.. Fixes: cd9c57cad3fe ("x86/MCE: Dump MCE to dmesg if no consumers") Link: http://lkml.kernel.org/r/20170327093304.10683-2-bp@alien8.de Signed-off-by: Ingo Molnar Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8e9725c607ea..5accfbdee3f0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -54,6 +54,8 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex); +static int mce_chrdev_open_count; /* #times opened */ + #define mce_log_get_idx_check(p) \ ({ \ RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ @@ -598,6 +600,10 @@ static int mce_default_notifier(struct notifier_block *nb, unsigned long val, if (atomic_read(&num_notifiers) > 2) return NOTIFY_DONE; + /* Don't print when mcelog is running */ + if (mce_chrdev_open_count > 0) + return NOTIFY_DONE; + __print_mce(m); return NOTIFY_DONE; @@ -1828,7 +1834,6 @@ void mcheck_cpu_clear(struct cpuinfo_x86 *c) */ static DEFINE_SPINLOCK(mce_chrdev_state_lock); -static int mce_chrdev_open_count; /* #times opened */ static int mce_chrdev_open_exclu; /* already open exclusive? */ static int mce_chrdev_open(struct inode *inode, struct file *file) -- cgit From 7ad658b693536741c37b16aeb07840a2ce75f5b9 Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Thu, 23 Mar 2017 07:18:08 +0100 Subject: KVM: nVMX: fix nested EPT detection The nested_ept_enabled flag introduced in commit 7ca29de2136 was not computed correctly. We are interested only in L1's EPT state, not the the combined L0+L1 value. In particular, if L0 uses EPT but L1 does not, nested_ept_enabled must be false to make sure that PDPSTRs are loaded based on CR3 as usual, because the special case described in 26.3.2.4 Loading Page-Directory- Pointer-Table Entries does not apply. Fixes: 7ca29de21362 ("KVM: nVMX: fix CR3 load if L2 uses PAE paging and EPT") Cc: qemu-stable@nongnu.org Reported-by: Wanpeng Li Reviewed-by: David Hildenbrand Signed-off-by: Ladi Prosek Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index cd1ba62878f3..2ee00dbbbd51 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9992,7 +9992,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, { struct vcpu_vmx *vmx = to_vmx(vcpu); u32 exec_control; - bool nested_ept_enabled = false; vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); @@ -10139,8 +10138,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs12->guest_intr_status); } - nested_ept_enabled = (exec_control & SECONDARY_EXEC_ENABLE_EPT) != 0; - /* * Write an illegal value to APIC_ACCESS_ADDR. Later, * nested_get_vmcs12_pages will either fix it up or @@ -10303,7 +10300,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmx_set_efer(vcpu, vcpu->arch.efer); /* Shadow page tables on either EPT or shadow page tables. */ - if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_ept_enabled, + if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12), entry_failure_code)) return 1; -- cgit From 2beb6dad2e8f95d710159d5befb390e4f62ab5cf Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 27 Mar 2017 17:53:50 +0200 Subject: KVM: x86: cleanup the page tracking SRCU instance SRCU uses a delayed work item. Skip cleaning it up, and the result is use-after-free in the work item callbacks. Reported-by: Dmitry Vyukov Suggested-by: Dmitry Vyukov Cc: stable@vger.kernel.org Fixes: 0eb05bf290cfe8610d9680b49abef37febd1c38a Reviewed-by: Xiao Guangrong Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_page_track.h | 1 + arch/x86/kvm/page_track.c | 8 ++++++++ arch/x86/kvm/x86.c | 1 + 3 files changed, 10 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h index d74747b031ec..c4eda791f877 100644 --- a/arch/x86/include/asm/kvm_page_track.h +++ b/arch/x86/include/asm/kvm_page_track.h @@ -46,6 +46,7 @@ struct kvm_page_track_notifier_node { }; void kvm_page_track_init(struct kvm *kvm); +void kvm_page_track_cleanup(struct kvm *kvm); void kvm_page_track_free_memslot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont); diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c index 37942e419c32..60168cdd0546 100644 --- a/arch/x86/kvm/page_track.c +++ b/arch/x86/kvm/page_track.c @@ -160,6 +160,14 @@ bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn, return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]); } +void kvm_page_track_cleanup(struct kvm *kvm) +{ + struct kvm_page_track_notifier_head *head; + + head = &kvm->arch.track_notifier_head; + cleanup_srcu_struct(&head->track_srcu); +} + void kvm_page_track_init(struct kvm *kvm) { struct kvm_page_track_notifier_head *head; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 64697fe475c3..ccbd45ecd41a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8158,6 +8158,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_free_vcpus(kvm); kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); kvm_mmu_uninit_vm(kvm); + kvm_page_track_cleanup(kvm); } void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, -- cgit From 3f135e57a4f76d24ae8d8a490314331f0ced40c5 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 16 Mar 2017 14:31:33 -0500 Subject: x86/build: Mostly disable '-maccumulate-outgoing-args' The GCC '-maccumulate-outgoing-args' flag is enabled for most configs, mostly because of issues which are no longer relevant. For most configs, and with most recent versions of GCC, it's no longer needed. Clarify which cases need it, and only enable it for those cases. Also produce a compile-time error for the ftrace graph + mcount + '-Os' case, which will otherwise cause runtime failures. The main benefit of '-maccumulate-outgoing-args' is that it prevents an ugly prologue for functions which have aligned stacks. But removing the option also has some benefits: more readable argument saves, smaller text size, and (presumably) slightly improved performance. Here are the object size savings for 32-bit and 64-bit defconfig kernels: text data bss dec hex filename 10006710 3543328 1773568 15323606 e9d1d6 vmlinux.x86-32.before 9706358 3547424 1773568 15027350 e54c96 vmlinux.x86-32.after text data bss dec hex filename 10652105 4537576 843776 16033457 f4a6b1 vmlinux.x86-64.before 10639629 4537576 843776 16020981 f475f5 vmlinux.x86-64.after That comes out to a 3% text size improvement on x86-32 and a 0.1% text size improvement on x86-64. Signed-off-by: Josh Poimboeuf Cc: Andrew Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Linus Torvalds Cc: Pavel Machek Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170316193133.zrj6gug53766m6nn@treble Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 35 +++++++++++++++++++++++++++++++---- arch/x86/Makefile_32.cpu | 18 ------------------ arch/x86/kernel/ftrace.c | 6 ++++++ 3 files changed, 37 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 2d449337a360..a94a4d10f2df 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -120,10 +120,6 @@ else # -funit-at-a-time shrinks the kernel .text considerably # unfortunately it makes reading oopses harder. KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time) - - # this works around some issues with generating unwind tables in older gccs - # newer gccs do it by default - KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args) endif ifdef CONFIG_X86_X32 @@ -147,6 +143,37 @@ ifeq ($(CONFIG_KMEMCHECK),y) KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy) endif +# +# If the function graph tracer is used with mcount instead of fentry, +# '-maccumulate-outgoing-args' is needed to prevent a GCC bug +# (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=42109) +# +ifdef CONFIG_FUNCTION_GRAPH_TRACER + ifndef CONFIG_HAVE_FENTRY + ACCUMULATE_OUTGOING_ARGS := 1 + else + ifeq ($(call cc-option-yn, -mfentry), n) + ACCUMULATE_OUTGOING_ARGS := 1 + endif + endif +endif + +# +# Jump labels need '-maccumulate-outgoing-args' for gcc < 4.5.2 to prevent a +# GCC bug (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46226). There's no way +# to test for this bug at compile-time because the test case needs to execute, +# which is a no-go for cross compilers. So check the GCC version instead. +# +ifdef CONFIG_JUMP_LABEL + ifneq ($(ACCUMULATE_OUTGOING_ARGS), 1) + ACCUMULATE_OUTGOING_ARGS = $(call cc-if-fullversion, -lt, 040502, 1) + endif +endif + +ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1) + KBUILD_CFLAGS += -maccumulate-outgoing-args +endif + # Stackpointer is addressed different for 32 bit and 64 bit x86 sp-$(CONFIG_X86_32) := esp sp-$(CONFIG_X86_64) := rsp diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 6647ed49c66c..a45eb15b7cf2 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -45,24 +45,6 @@ cflags-$(CONFIG_MGEODE_LX) += $(call cc-option,-march=geode,-march=pentium-mmx) # cpu entries cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) -# Work around the pentium-mmx code generator madness of gcc4.4.x which -# does stack alignment by generating horrible code _before_ the mcount -# prologue (push %ebp, mov %esp, %ebp) which breaks the function graph -# tracer assumptions. For i686, generic, core2 this is set by the -# compiler anyway -ifeq ($(CONFIG_FUNCTION_GRAPH_TRACER), y) -ADD_ACCUMULATE_OUTGOING_ARGS := y -endif - -# Work around to a bug with asm goto with first implementations of it -# in gcc causing gcc to mess up the push and pop of the stack in some -# uses of asm goto. -ifeq ($(CONFIG_JUMP_LABEL), y) -ADD_ACCUMULATE_OUTGOING_ARGS := y -endif - -cflags-$(ADD_ACCUMULATE_OUTGOING_ARGS) += $(call cc-option,-maccumulate-outgoing-args) - # Bug fix for binutils: this option is required in order to keep # binutils from generating NOPL instructions against our will. ifneq ($(CONFIG_X86_P6_NOP),y) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 8f3d9cf26ff9..cbd73eb42170 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -29,6 +29,12 @@ #include #include +#if defined(CONFIG_FUNCTION_GRAPH_TRACER) && \ + !defined(CC_USING_FENTRY) && \ + !defined(CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE) +# error The following combination is not supported: ((compiler missing -mfentry) || (CONFIG_X86_32 and !CONFIG_DYNAMIC_FTRACE)) && CONFIG_FUNCTION_GRAPH_TRACER && CONFIG_CC_OPTIMIZE_FOR_SIZE +#endif + #ifdef CONFIG_DYNAMIC_FTRACE int ftrace_arch_code_modify_prepare(void) -- cgit From 29f72ce3e4d18066ec75c79c857bee0618a3504b Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 30 Mar 2017 13:17:14 +0200 Subject: x86/mce/AMD: Give a name to MCA bank 3 when accessed with legacy MSRs MCA bank 3 is reserved on systems pre-Fam17h, so it didn't have a name. However, MCA bank 3 is defined on Fam17h systems and can be accessed using legacy MSRs. Without a name we get a stack trace on Fam17h systems when trying to register sysfs files for bank 3 on kernels that don't recognize Scalable MCA. Call MCA bank 3 "decode_unit" since this is what it represents on Fam17h. This will allow kernels without SMCA support to see this bank on Fam17h+ and prevent the stack trace. This will not affect older systems since this bank is reserved on them, i.e. it'll be ignored. Tested on AMD Fam15h and Fam17h systems. WARNING: CPU: 26 PID: 1 at lib/kobject.c:210 kobject_add_internal kobject: (ffff88085bb256c0): attempted to be registered with empty name! ... Call Trace: kobject_add_internal kobject_add kobject_create_and_add threshold_create_device threshold_init_device Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1490102285-3659-1-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 524cc5780a77..6e4a047e4b68 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -60,7 +60,7 @@ static const char * const th_names[] = { "load_store", "insn_fetch", "combined_unit", - "", + "decode_unit", "northbridge", "execution_unit", }; -- cgit From 6b1cc946ddfcfc17d66c7d02eafa14deeb183437 Mon Sep 17 00:00:00 2001 From: Zhengyi Shen Date: Wed, 29 Mar 2017 15:00:20 +0800 Subject: x86/boot: Include missing header file Sparse complains about missing forward declarations: arch/x86/boot/compressed/error.c:8:6: warning: symbol 'warn' was not declared. Should it be static? arch/x86/boot/compressed/error.c:15:6: warning: symbol 'error' was not declared. Should it be static? Include the missing header file. Signed-off-by: Zhengyi Shen Acked-by: Kess Cook Link: http://lkml.kernel.org/r/1490770820-24472-1-git-send-email-shenzhengyi@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/boot/compressed/error.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/error.c b/arch/x86/boot/compressed/error.c index 6248740b68b5..31922023de49 100644 --- a/arch/x86/boot/compressed/error.c +++ b/arch/x86/boot/compressed/error.c @@ -4,6 +4,7 @@ * memcpy() and memmove() are defined for the compressed boot environment. */ #include "misc.h" +#include "error.h" void warn(char *m) { -- cgit From 13a6798e4a03096b11bf402a063786a7be55d426 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 31 Mar 2017 15:12:12 -0700 Subject: kasan: do not sanitize kexec purgatory Fixes this: kexec: Undefined symbol: __asan_load8_noabort kexec-bzImage64: Loading purgatory failed Link: http://lkml.kernel.org/r/1489672155.4458.7.camel@gmx.de Signed-off-by: Mike Galbraith Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/purgatory/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 555b9fa0ad43..7dbdb780264d 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -8,6 +8,7 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib targets += purgatory.ro +KASAN_SANITIZE := n KCOV_INSTRUMENT := n # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That -- cgit From ab007cc94ff9d82f5a8db8363b3becbd946e58cf Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Fri, 31 Mar 2017 10:19:26 +0200 Subject: KVM: nVMX: do not leak PML full vmexit to L1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PML feature is not exposed to guests so we should not be forwarding the vmexit either. This commit fixes BSOD 0x20001 (HYPERVISOR_ERROR) when running Hyper-V enabled Windows Server 2016 in L1 on hardware that supports PML. Fixes: 843e4330573c ("KVM: VMX: Add PML support in VMX") Signed-off-by: Ladi Prosek Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2ee00dbbbd51..605183291069 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8198,6 +8198,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); case EXIT_REASON_PREEMPTION_TIMER: return false; + case EXIT_REASON_PML_FULL: + /* We don't expose PML support to L1. */ + return false; default: return true; } -- cgit From 1fb883bb827ee8efc1cc9ea0154f953f8a219d38 Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Tue, 4 Apr 2017 14:18:53 +0200 Subject: KVM: nVMX: initialize PML fields in vmcs02 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit L2 was running with uninitialized PML fields which led to incomplete dirty bitmap logging. This manifested as all kinds of subtle erratic behavior of the nested guest. Fixes: 843e4330573c ("KVM: VMX: Add PML support in VMX") Signed-off-by: Ladi Prosek Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 605183291069..259e9b28ccf8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10270,6 +10270,18 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, } + if (enable_pml) { + /* + * Conceptually we want to copy the PML address and index from + * vmcs01 here, and then back to vmcs01 on nested vmexit. But, + * since we always flush the log on each vmexit, this happens + * to be equivalent to simply resetting the fields in vmcs02. + */ + ASSERT(vmx->pml_pg); + vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); + vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); + } + if (nested_cpu_has_ept(vmcs12)) { kvm_mmu_unload(vcpu); nested_ept_init_mmu_context(vcpu); -- cgit