From b28cb0cd2c5e80a8c0feb408a0e4b0dbb6d132c5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 May 2022 14:51:22 +0000 Subject: KVM: x86/mmu: Update number of zapped pages even if page list is stable When zapping obsolete pages, update the running count of zapped pages regardless of whether or not the list has become unstable due to zapping a shadow page with its own child shadow pages. If the VM is backed by mostly 4kb pages, KVM can zap an absurd number of SPTEs without bumping the batch count and thus without yielding. In the worst case scenario, this can cause a soft lokcup. watchdog: BUG: soft lockup - CPU#12 stuck for 22s! [dirty_log_perf_:13020] RIP: 0010:workingset_activation+0x19/0x130 mark_page_accessed+0x266/0x2e0 kvm_set_pfn_accessed+0x31/0x40 mmu_spte_clear_track_bits+0x136/0x1c0 drop_spte+0x1a/0xc0 mmu_page_zap_pte+0xef/0x120 __kvm_mmu_prepare_zap_page+0x205/0x5e0 kvm_mmu_zap_all_fast+0xd7/0x190 kvm_mmu_invalidate_zap_pages_in_memslot+0xe/0x10 kvm_page_track_flush_slot+0x5c/0x80 kvm_arch_flush_shadow_memslot+0xe/0x10 kvm_set_memslot+0x1a8/0x5d0 __kvm_set_memory_region+0x337/0x590 kvm_vm_ioctl+0xb08/0x1040 Fixes: fbb158cb88b6 ("KVM: x86/mmu: Revert "Revert "KVM: MMU: zap pages in batch""") Reported-by: David Matlack Reviewed-by: Ben Gardon Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20220511145122.3133334-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 311e4e1d7870..56ebc4fb7f91 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5665,6 +5665,7 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm) { struct kvm_mmu_page *sp, *node; int nr_zapped, batch = 0; + bool unstable; restart: list_for_each_entry_safe_reverse(sp, node, @@ -5696,11 +5697,12 @@ restart: goto restart; } - if (__kvm_mmu_prepare_zap_page(kvm, sp, - &kvm->arch.zapped_obsolete_pages, &nr_zapped)) { - batch += nr_zapped; + unstable = __kvm_mmu_prepare_zap_page(kvm, sp, + &kvm->arch.zapped_obsolete_pages, &nr_zapped); + batch += nr_zapped; + + if (unstable) goto restart; - } } /* -- cgit From 5163373af195f10e0d99a8de3465c4ed36bdc337 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 3 May 2022 22:14:24 +0100 Subject: KVM: arm64: vgic-v3: Consistently populate ID_AA64PFR0_EL1.GIC When adding support for the slightly wonky Apple M1, we had to populate ID_AA64PFR0_EL1.GIC==1 to present something to the guest, as the HW itself doesn't advertise the feature. However, we gated this on the in-kernel irqchip being created. This causes some trouble for QEMU, which snapshots the state of the registers before creating a virtual GIC, and then tries to restore these registers once the GIC has been created. Obviously, between the two stages, ID_AA64PFR0_EL1.GIC has changed value, and the write fails. The fix is to actually emulate the HW, and always populate the field if the HW is capable of it. Fixes: 562e530fd770 ("KVM: arm64: Force ID_AA64PFR0_EL1.GIC=1 when exposing a virtual GICv3") Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier Reported-by: Peter Maydell Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20220503211424.3375263-1-maz@kernel.org --- arch/arm64/kvm/sys_regs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 7b45c040cc27..adf408c09cdb 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1123,8 +1123,7 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2); val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3); val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3); - if (irqchip_in_kernel(vcpu->kvm) && - vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + if (kvm_vgic_global_state.type == VGIC_V3) { val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_GIC); val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_GIC), 1); } -- cgit From 2e40316753ee552fb598e8da8ca0d20a04e67453 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 13 May 2022 09:26:07 +0000 Subject: KVM: arm64: Don't hypercall before EL2 init Will reported the following splat when running with Protected KVM enabled: [ 2.427181] ------------[ cut here ]------------ [ 2.427668] WARNING: CPU: 3 PID: 1 at arch/arm64/kvm/mmu.c:489 __create_hyp_private_mapping+0x118/0x1ac [ 2.428424] Modules linked in: [ 2.429040] CPU: 3 PID: 1 Comm: swapper/0 Not tainted 5.18.0-rc2-00084-g8635adc4efc7 #1 [ 2.429589] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 [ 2.430286] pstate: 80000005 (Nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 2.430734] pc : __create_hyp_private_mapping+0x118/0x1ac [ 2.431091] lr : create_hyp_exec_mappings+0x40/0x80 [ 2.431377] sp : ffff80000803baf0 [ 2.431597] x29: ffff80000803bb00 x28: 0000000000000000 x27: 0000000000000000 [ 2.432156] x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 [ 2.432561] x23: ffffcd96c343b000 x22: 0000000000000000 x21: ffff80000803bb40 [ 2.433004] x20: 0000000000000004 x19: 0000000000001800 x18: 0000000000000000 [ 2.433343] x17: 0003e68cf7efdd70 x16: 0000000000000004 x15: fffffc81f602a2c8 [ 2.434053] x14: ffffdf8380000000 x13: ffffcd9573200000 x12: ffffcd96c343b000 [ 2.434401] x11: 0000000000000004 x10: ffffcd96c1738000 x9 : 0000000000000004 [ 2.434812] x8 : ffff80000803bb40 x7 : 7f7f7f7f7f7f7f7f x6 : 544f422effff306b [ 2.435136] x5 : 000000008020001e x4 : ffff207d80a88c00 x3 : 0000000000000005 [ 2.435480] x2 : 0000000000001800 x1 : 000000014f4ab800 x0 : 000000000badca11 [ 2.436149] Call trace: [ 2.436600] __create_hyp_private_mapping+0x118/0x1ac [ 2.437576] create_hyp_exec_mappings+0x40/0x80 [ 2.438180] kvm_init_vector_slots+0x180/0x194 [ 2.458941] kvm_arch_init+0x80/0x274 [ 2.459220] kvm_init+0x48/0x354 [ 2.459416] arm_init+0x20/0x2c [ 2.459601] do_one_initcall+0xbc/0x238 [ 2.459809] do_initcall_level+0x94/0xb4 [ 2.460043] do_initcalls+0x54/0x94 [ 2.460228] do_basic_setup+0x1c/0x28 [ 2.460407] kernel_init_freeable+0x110/0x178 [ 2.460610] kernel_init+0x20/0x1a0 [ 2.460817] ret_from_fork+0x10/0x20 [ 2.461274] ---[ end trace 0000000000000000 ]--- Indeed, the Protected KVM mode promotes __create_hyp_private_mapping() to a hypercall as EL1 no longer has access to the hypervisor's stage-1 page-table. However, the call from kvm_init_vector_slots() happens after pKVM has been initialized on the primary CPU, but before it has been initialized on secondaries. As such, if the KVM initcall procedure is migrated from one CPU to another in this window, the hypercall may end up running on a CPU for which EL2 has not been initialized. Fortunately, the pKVM hypervisor doesn't rely on the host to re-map the vectors in the private range, so the hypercall in question is in fact superfluous. Skip it when pKVM is enabled. Reported-by: Will Deacon Signed-off-by: Quentin Perret [maz: simplified the checks slightly] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220513092607.35233-1-qperret@google.com --- arch/arm64/kvm/arm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 523bc934fe2f..a66d83540c15 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1436,7 +1436,8 @@ static int kvm_init_vector_slots(void) base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT); - if (kvm_system_needs_idmapped_vectors() && !has_vhe()) { + if (kvm_system_needs_idmapped_vectors() && + !is_protected_kvm_enabled()) { err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs), __BP_HARDEN_HYP_VECS_SZ, &base); if (err) -- cgit From 4ac19ead0dfbabd8e0bfc731f507cfb0b95d6c99 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Tue, 17 May 2022 05:12:36 +0000 Subject: kvm: x86/pmu: Fix the compare function used by the pmu event filter When returning from the compare function the u64 is truncated to an int. This results in a loss of the high nybble[1] in the event select and its sign if that nybble is in use. Switch from using a result that can end up being truncated to a result that can only be: 1, 0, -1. [1] bits 35:32 in the event select register and bits 11:8 in the event select. Fixes: 7ff775aca48ad ("KVM: x86/pmu: Use binary search to check filtered events") Signed-off-by: Aaron Lewis Reviewed-by: Sean Christopherson Message-Id: <20220517051238.2566934-1-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/pmu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index eca39f56c231..0604bc29f0b8 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -171,9 +171,12 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc) return true; } -static int cmp_u64(const void *a, const void *b) +static int cmp_u64(const void *pa, const void *pb) { - return *(__u64 *)a - *(__u64 *)b; + u64 a = *(u64 *)pa; + u64 b = *(u64 *)pb; + + return (a > b) - (a < b); } void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) -- cgit From 04baa2233d55e85e0f0f5dfe0401ecb027da9a0e Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Tue, 17 May 2022 05:12:37 +0000 Subject: selftests: kvm/x86: Add the helper function create_pmu_event_filter Add a helper function that creates a pmu event filter given an event list. Currently, a pmu event filter can only be created with the same hard coded event list. Add a way to create one given a different event list. Also, rename make_pmu_event_filter to alloc_pmu_event_filter to clarify it's purpose given the introduction of create_pmu_event_filter. No functional changes intended. Signed-off-by: Aaron Lewis Message-Id: <20220517051238.2566934-2-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/x86_64/pmu_event_filter_test.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index 0d06ffa95d9d..30c1a5804210 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -208,7 +208,7 @@ static bool sanity_check_pmu(struct kvm_vm *vm) return success; } -static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents) +static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents) { struct kvm_pmu_event_filter *f; int size = sizeof(*f) + nevents * sizeof(f->events[0]); @@ -220,19 +220,29 @@ static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents) return f; } -static struct kvm_pmu_event_filter *event_filter(uint32_t action) + +static struct kvm_pmu_event_filter * +create_pmu_event_filter(const uint64_t event_list[], + int nevents, uint32_t action) { struct kvm_pmu_event_filter *f; int i; - f = make_pmu_event_filter(ARRAY_SIZE(event_list)); + f = alloc_pmu_event_filter(nevents); f->action = action; - for (i = 0; i < ARRAY_SIZE(event_list); i++) + for (i = 0; i < nevents; i++) f->events[i] = event_list[i]; return f; } +static struct kvm_pmu_event_filter *event_filter(uint32_t action) +{ + return create_pmu_event_filter(event_list, + ARRAY_SIZE(event_list), + action); +} + /* * Remove the first occurrence of 'event' (if any) from the filter's * event list. -- cgit From c41ef29cc1d4fa4ac5f1bb8e6ab57bf6f02cf878 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Tue, 17 May 2022 05:12:38 +0000 Subject: selftests: kvm/x86: Verify the pmu event filter matches the correct event Add a test to demonstrate that when the guest programs an event select it is matched correctly in the pmu event filter and not inadvertently filtered. This could happen on AMD if the high nybble[1] in the event select gets truncated away only leaving the bottom byte[2] left for matching. This is a contrived example used for the convenience of demonstrating this issue, however, this can be applied to event selects 0x28A (OC Mode Switch) and 0x08A (L1 BTB Correction), where 0x08A could end up being denied when the event select was only set up to deny 0x28A. [1] bits 35:32 in the event select register and bits 11:8 in the event select. [2] bits 7:0 in the event select register and bits 7:0 in the event select. Signed-off-by: Aaron Lewis Message-Id: <20220517051238.2566934-3-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/x86_64/pmu_event_filter_test.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index 30c1a5804210..93d77574b255 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -281,6 +281,22 @@ static uint64_t test_with_filter(struct kvm_vm *vm, return run_vm_to_sync(vm); } +static void test_amd_deny_list(struct kvm_vm *vm) +{ + uint64_t event = EVENT(0x1C2, 0); + struct kvm_pmu_event_filter *f; + uint64_t count; + + f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY); + count = test_with_filter(vm, f); + + free(f); + if (count != NUM_BRANCHES) + pr_info("%s: Branch instructions retired = %lu (expected %u)\n", + __func__, count, NUM_BRANCHES); + TEST_ASSERT(count, "Allowed PMU event is not counting"); +} + static void test_member_deny_list(struct kvm_vm *vm) { struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY); @@ -463,6 +479,9 @@ int main(int argc, char *argv[]) exit(KSFT_SKIP); } + if (use_amd_pmu()) + test_amd_deny_list(vm); + test_without_filter(vm); test_member_deny_list(vm); test_member_allow_list(vm); -- cgit From e332b55fe79cca72451fe0b797219bd9fe6b9434 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 19 May 2022 01:49:13 -0700 Subject: KVM: eventfd: Fix false positive RCU usage warning The splat below can be seen when running kvm-unit-test: ============================= WARNING: suspicious RCU usage 5.18.0-rc7 #5 Tainted: G IOE ----------------------------- /home/kernel/linux/arch/x86/kvm/../../../virt/kvm/eventfd.c:80 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 4 locks held by qemu-system-x86/35124: #0: ffff9725391d80b8 (&vcpu->mutex){+.+.}-{4:4}, at: kvm_vcpu_ioctl+0x77/0x710 [kvm] #1: ffffbd25cfb2a0b8 (&kvm->srcu){....}-{0:0}, at: vcpu_enter_guest+0xdeb/0x1900 [kvm] #2: ffffbd25cfb2b920 (&kvm->irq_srcu){....}-{0:0}, at: kvm_hv_notify_acked_sint+0x79/0x1e0 [kvm] #3: ffffbd25cfb2b920 (&kvm->irq_srcu){....}-{0:0}, at: irqfd_resampler_ack+0x5/0x110 [kvm] stack backtrace: CPU: 2 PID: 35124 Comm: qemu-system-x86 Tainted: G IOE 5.18.0-rc7 #5 Call Trace: dump_stack_lvl+0x6c/0x9b irqfd_resampler_ack+0xfd/0x110 [kvm] kvm_notify_acked_gsi+0x32/0x90 [kvm] kvm_hv_notify_acked_sint+0xc5/0x1e0 [kvm] kvm_hv_set_msr_common+0xec1/0x1160 [kvm] kvm_set_msr_common+0x7c3/0xf60 [kvm] vmx_set_msr+0x394/0x1240 [kvm_intel] kvm_set_msr_ignored_check+0x86/0x200 [kvm] kvm_emulate_wrmsr+0x4f/0x1f0 [kvm] vmx_handle_exit+0x6fb/0x7e0 [kvm_intel] vcpu_enter_guest+0xe5a/0x1900 [kvm] kvm_arch_vcpu_ioctl_run+0x16e/0xac0 [kvm] kvm_vcpu_ioctl+0x279/0x710 [kvm] __x64_sys_ioctl+0x83/0xb0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae resampler-list is protected by irq_srcu (see kvm_irqfd_assign), so fix the false positive by using list_for_each_entry_srcu(). Signed-off-by: Wanpeng Li Message-Id: <1652950153-12489-1-git-send-email-wanpengli@tencent.com> Signed-off-by: Paolo Bonzini --- virt/kvm/eventfd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 59b1dd4a549e..2a3ed401ce46 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -77,7 +77,8 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) idx = srcu_read_lock(&kvm->irq_srcu); - list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link) + list_for_each_entry_srcu(irqfd, &resampler->list, resampler_link, + srcu_read_lock_held(&kvm->irq_srcu)) eventfd_signal(irqfd->resamplefd, 1); srcu_read_unlock(&kvm->irq_srcu, idx); -- cgit From c87661f855c3f2023e40ddc364002601ee234367 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 18 May 2022 00:38:42 +0000 Subject: KVM: Free new dirty bitmap if creating a new memslot fails Fix a goof in kvm_prepare_memory_region() where KVM fails to free the new memslot's dirty bitmap during a CREATE action if kvm_arch_prepare_memory_region() fails. The logic is supposed to detect if the bitmap was allocated and thus needs to be freed, versus if the bitmap was inherited from the old memslot and thus needs to be kept. If there is no old memslot, then obviously the bitmap can't have been inherited The bug was exposed by commit 86931ff7207b ("KVM: x86/mmu: Do not create SPTEs for GFNs that exceed host.MAXPHYADDR"), which made it trivally easy for syzkaller to trigger failure during kvm_arch_prepare_memory_region(), but the bug can be hit other ways too, e.g. due to -ENOMEM when allocating x86's memslot metadata. The backtrace from kmemleak: __vmalloc_node_range+0xb40/0xbd0 mm/vmalloc.c:3195 __vmalloc_node mm/vmalloc.c:3232 [inline] __vmalloc+0x49/0x50 mm/vmalloc.c:3246 __vmalloc_array mm/util.c:671 [inline] __vcalloc+0x49/0x70 mm/util.c:694 kvm_alloc_dirty_bitmap virt/kvm/kvm_main.c:1319 kvm_prepare_memory_region virt/kvm/kvm_main.c:1551 kvm_set_memslot+0x1bd/0x690 virt/kvm/kvm_main.c:1782 __kvm_set_memory_region+0x689/0x750 virt/kvm/kvm_main.c:1949 kvm_set_memory_region virt/kvm/kvm_main.c:1962 kvm_vm_ioctl_set_memory_region virt/kvm/kvm_main.c:1974 kvm_vm_ioctl+0x377/0x13a0 virt/kvm/kvm_main.c:4528 vfs_ioctl fs/ioctl.c:51 __do_sys_ioctl fs/ioctl.c:870 __se_sys_ioctl fs/ioctl.c:856 __x64_sys_ioctl+0xfc/0x140 fs/ioctl.c:856 do_syscall_x64 arch/x86/entry/common.c:50 do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae And the relevant sequence of KVM events: ioctl(3, KVM_CREATE_VM, 0) = 4 ioctl(4, KVM_SET_USER_MEMORY_REGION, {slot=0, flags=KVM_MEM_LOG_DIRTY_PAGES, guest_phys_addr=0x10000000000000, memory_size=4096, userspace_addr=0x20fe8000} ) = -1 EINVAL (Invalid argument) Fixes: 244893fa2859 ("KVM: Dynamically allocate "new" memslots from the get-go") Cc: stable@vger.kernel.org Reported-by: syzbot+8606b8a9cc97a63f1c87@syzkaller.appspotmail.com Signed-off-by: Sean Christopherson Message-Id: <20220518003842.1341782-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6d971fb1b08d..5ab12214e18d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1560,7 +1560,7 @@ static int kvm_prepare_memory_region(struct kvm *kvm, r = kvm_arch_prepare_memory_region(kvm, old, new, change); /* Free the bitmap on failure if it was allocated above. */ - if (r && new && new->dirty_bitmap && old && !old->dirty_bitmap) + if (r && new && new->dirty_bitmap && (!old || !old->dirty_bitmap)) kvm_destroy_dirty_bitmap(new); return r; -- cgit From ea8c66fe8d8f4f93df941e52120a3512d7bf5128 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 19 May 2022 10:15:04 -0700 Subject: KVM: x86: hyper-v: fix type of valid_bank_mask In kvm_hv_flush_tlb(), valid_bank_mask is declared as unsigned long, but is used as u64, which is wrong for i386, and has been spotted by LKP after applying "KVM: x86: hyper-v: replace bitmap_weight() with hweight64()" https://lore.kernel.org/lkml/20220510154750.212913-12-yury.norov@gmail.com/ But it's wrong even without that patch because now bitmap_weight() dereferences a word after valid_bank_mask on i386. >> include/asm-generic/bitops/const_hweight.h:21:76: warning: right shift count >= width of type +[-Wshift-count-overflow] 21 | #define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w) >> 32)) | ^~ include/asm-generic/bitops/const_hweight.h:10:16: note: in definition of macro '__const_hweight8' 10 | ((!!((w) & (1ULL << 0))) + \ | ^ include/asm-generic/bitops/const_hweight.h:20:31: note: in expansion of macro '__const_hweight16' 20 | #define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w) >> 16)) | ^~~~~~~~~~~~~~~~~ include/asm-generic/bitops/const_hweight.h:21:54: note: in expansion of macro '__const_hweight32' 21 | #define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w) >> 32)) | ^~~~~~~~~~~~~~~~~ include/asm-generic/bitops/const_hweight.h:29:49: note: in expansion of macro '__const_hweight64' 29 | #define hweight64(w) (__builtin_constant_p(w) ? __const_hweight64(w) : __arch_hweight64(w)) | ^~~~~~~~~~~~~~~~~ arch/x86/kvm/hyperv.c:1983:36: note: in expansion of macro 'hweight64' 1983 | if (hc->var_cnt != hweight64(valid_bank_mask)) | ^~~~~~~~~ CC: Borislav Petkov CC: Dave Hansen CC: H. Peter Anvin CC: Ingo Molnar CC: Jim Mattson CC: Joerg Roedel CC: Paolo Bonzini CC: Sean Christopherson CC: Thomas Gleixner CC: Vitaly Kuznetsov CC: Wanpeng Li CC: kvm@vger.kernel.org CC: linux-kernel@vger.kernel.org CC: x86@kernel.org Reported-by: kernel test robot Signed-off-by: Yury Norov Message-Id: <20220519171504.1238724-1-yury.norov@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 46f9dfb60469..a0702b6be3e8 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1914,7 +1914,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) struct hv_send_ipi_ex send_ipi_ex; struct hv_send_ipi send_ipi; DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); - unsigned long valid_bank_mask; + u64 valid_bank_mask; u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; u32 vector; bool all_cpus; @@ -1956,7 +1956,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask; all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL; - if (hc->var_cnt != bitmap_weight(&valid_bank_mask, 64)) + if (hc->var_cnt != bitmap_weight((unsigned long *)&valid_bank_mask, 64)) return HV_STATUS_INVALID_HYPERCALL_INPUT; if (all_cpus) -- cgit From 9f46c187e2e680ecd9de7983e4d081c3391acc76 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 20 May 2022 13:48:11 -0400 Subject: KVM: x86/mmu: fix NULL pointer dereference on guest INVPCID With shadow paging enabled, the INVPCID instruction results in a call to kvm_mmu_invpcid_gva. If INVPCID is executed with CR0.PG=0, the invlpg callback is not set and the result is a NULL pointer dereference. Fix it trivially by checking for mmu->invlpg before every call. There are other possibilities: - check for CR0.PG, because KVM (like all Intel processors after P5) flushes guest TLB on CR0.PG changes so that INVPCID/INVLPG are a nop with paging disabled - check for EFER.LMA, because KVM syncs and flushes when switching MMU contexts outside of 64-bit mode All of these are tricky, go for the simple solution. This is CVE-2022-1789. Reported-by: Yongkang Jia Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 56ebc4fb7f91..45e1573f8f1d 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5470,14 +5470,16 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid) uint i; if (pcid == kvm_get_active_pcid(vcpu)) { - mmu->invlpg(vcpu, gva, mmu->root.hpa); + if (mmu->invlpg) + mmu->invlpg(vcpu, gva, mmu->root.hpa); tlb_flush = true; } for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { if (VALID_PAGE(mmu->prev_roots[i].hpa) && pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd)) { - mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa); + if (mmu->invlpg) + mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa); tlb_flush = true; } } -- cgit