From 88573389aaa34640b9ecde15622c68e8737d8f8c Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 11 Jul 2022 09:29:51 +0530 Subject: riscv: Fix missing PAGE_PFN_MASK There are a bunch of functions that use the PFN from a page table entry that end up with the svpbmt upper-bits because they are missing the newly introduced PAGE_PFN_MASK which leads to wrong addresses conversions and then crash: fix this by adding this mask. Fixes: 100631b48ded ("riscv: Fix accessing pfn bits in PTEs for non-32bit variants") Signed-off-by: Alexandre Ghiti Signed-off-by: Anup Patel --- arch/riscv/include/asm/pgtable-64.h | 12 ++++++------ arch/riscv/include/asm/pgtable.h | 6 +++--- arch/riscv/kvm/mmu.c | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 5c2aba5efbd0..dc42375c2357 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -175,7 +175,7 @@ static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot) static inline unsigned long _pud_pfn(pud_t pud) { - return pud_val(pud) >> _PAGE_PFN_SHIFT; + return __page_val_to_pfn(pud_val(pud)); } static inline pmd_t *pud_pgtable(pud_t pud) @@ -278,13 +278,13 @@ static inline p4d_t pfn_p4d(unsigned long pfn, pgprot_t prot) static inline unsigned long _p4d_pfn(p4d_t p4d) { - return p4d_val(p4d) >> _PAGE_PFN_SHIFT; + return __page_val_to_pfn(p4d_val(p4d)); } static inline pud_t *p4d_pgtable(p4d_t p4d) { if (pgtable_l4_enabled) - return (pud_t *)pfn_to_virt(p4d_val(p4d) >> _PAGE_PFN_SHIFT); + return (pud_t *)pfn_to_virt(__page_val_to_pfn(p4d_val(p4d))); return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) }); } @@ -292,7 +292,7 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) static inline struct page *p4d_page(p4d_t p4d) { - return pfn_to_page(p4d_val(p4d) >> _PAGE_PFN_SHIFT); + return pfn_to_page(__page_val_to_pfn(p4d_val(p4d))); } #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) @@ -347,7 +347,7 @@ static inline void pgd_clear(pgd_t *pgd) static inline p4d_t *pgd_pgtable(pgd_t pgd) { if (pgtable_l5_enabled) - return (p4d_t *)pfn_to_virt(pgd_val(pgd) >> _PAGE_PFN_SHIFT); + return (p4d_t *)pfn_to_virt(__page_val_to_pfn(pgd_val(pgd))); return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) }); } @@ -355,7 +355,7 @@ static inline p4d_t *pgd_pgtable(pgd_t pgd) static inline struct page *pgd_page(pgd_t pgd) { - return pfn_to_page(pgd_val(pgd) >> _PAGE_PFN_SHIFT); + return pfn_to_page(__page_val_to_pfn(pgd_val(pgd))); } #define pgd_page(pgd) pgd_page(pgd) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 1d1be9d9419c..5dbd6610729b 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -261,7 +261,7 @@ static inline pgd_t pfn_pgd(unsigned long pfn, pgprot_t prot) static inline unsigned long _pgd_pfn(pgd_t pgd) { - return pgd_val(pgd) >> _PAGE_PFN_SHIFT; + return __page_val_to_pfn(pgd_val(pgd)); } static inline struct page *pmd_page(pmd_t pmd) @@ -590,14 +590,14 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd) return __pmd(pmd_val(pmd) & ~(_PAGE_PRESENT|_PAGE_PROT_NONE)); } -#define __pmd_to_phys(pmd) (pmd_val(pmd) >> _PAGE_PFN_SHIFT << PAGE_SHIFT) +#define __pmd_to_phys(pmd) (__page_val_to_pfn(pmd_val(pmd)) << PAGE_SHIFT) static inline unsigned long pmd_pfn(pmd_t pmd) { return ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT); } -#define __pud_to_phys(pud) (pud_val(pud) >> _PAGE_PFN_SHIFT << PAGE_SHIFT) +#define __pud_to_phys(pud) (__page_val_to_pfn(pud_val(pud)) << PAGE_SHIFT) static inline unsigned long pud_pfn(pud_t pud) { diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 1c00695ebee7..9826073fbc67 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -54,7 +54,7 @@ static inline unsigned long gstage_pte_index(gpa_t addr, u32 level) static inline unsigned long gstage_pte_page_vaddr(pte_t pte) { - return (unsigned long)pfn_to_virt(pte_val(pte) >> _PAGE_PFN_SHIFT); + return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte))); } static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level) -- cgit From be82abe6a76ba8e76f25312566182b0f13c4fbf9 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 11 Jul 2022 09:36:32 +0530 Subject: RISC-V: KVM: Fix SRCU deadlock caused by kvm_riscv_check_vcpu_requests() The kvm_riscv_check_vcpu_requests() is called with SRCU read lock held and for KVM_REQ_SLEEP request it will block the VCPU without releasing SRCU read lock. This causes KVM ioctls (such as KVM_IOEVENTFD) from other VCPUs of the same Guest/VM to hang/deadlock if there is any synchronize_srcu() or synchronize_srcu_expedited() in the path. To fix the above in kvm_riscv_check_vcpu_requests(), we should do SRCU read unlock before blocking the VCPU and do SRCU read lock after VCPU wakeup. Fixes: cce69aff689e ("RISC-V: KVM: Implement VCPU interrupts and requests handling") Reported-by: Bin Meng Signed-off-by: Anup Patel Reviewed-by: Atish Patra Tested-by: Heinrich Schuchardt Tested-by: Bin Meng Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 7f4ad5e4373a..f3455dc013fa 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -781,9 +781,11 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) { + kvm_vcpu_srcu_read_unlock(vcpu); rcuwait_wait_event(wait, (!vcpu->arch.power_off) && (!vcpu->arch.pause), TASK_INTERRUPTIBLE); + kvm_vcpu_srcu_read_lock(vcpu); if (vcpu->arch.power_off || vcpu->arch.pause) { /* -- cgit From 99482726452bdf8be9325199022b17fa6d7d58fe Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 12 Jul 2022 15:50:09 +0200 Subject: KVM: nVMX: Always enable TSC scaling for L2 when it was enabled for L1 Windows 10/11 guests with Hyper-V role (WSL2) enabled are observed to hang upon boot or shortly after when a non-default TSC frequency was set for L1. The issue is observed on a host where TSC scaling is supported. The problem appears to be that Windows doesn't use TSC frequency for its guests even when the feature is advertised and KVM filters SECONDARY_EXEC_TSC_SCALING out when creating L2 controls from L1's. This leads to L2 running with the default frequency (matching host's) while L1 is running with an altered one. Keep SECONDARY_EXEC_TSC_SCALING in secondary exec controls for L2 when it was set for L1. TSC_MULTIPLIER is already correctly computed and written by prepare_vmcs02(). Signed-off-by: Vitaly Kuznetsov Reviewed-by: Maxim Levitsky Message-Id: <20220712135009.952805-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f5cb18e00e78..67d629837672 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2278,7 +2278,6 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_ENABLE_VMFUNC | - SECONDARY_EXEC_TSC_SCALING | SECONDARY_EXEC_DESC); if (nested_cpu_has(vmcs12, -- cgit From 84e7051c0bc1f2a13101553959b3a9d9a8e24939 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Wed, 13 Jul 2022 14:12:41 -0300 Subject: x86/kvm: fix FASTOP_SIZE when return thunks are enabled The return thunk call makes the fastop functions larger, just like IBT does. Consider a 16-byte FASTOP_SIZE when CONFIG_RETHUNK is enabled. Otherwise, functions will be incorrectly aligned and when computing their position for differently sized operators, they will executed in the middle or end of a function, which may as well be an int3, leading to a crash like: [ 36.091116] int3: 0000 [#1] SMP NOPTI [ 36.091119] CPU: 3 PID: 1371 Comm: qemu-system-x86 Not tainted 5.15.0-41-generic #44 [ 36.091120] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 [ 36.091121] RIP: 0010:xaddw_ax_dx+0x9/0x10 [kvm] [ 36.091185] Code: 00 0f bb d0 c3 cc cc cc cc 48 0f bb d0 c3 cc cc cc cc 0f 1f 80 00 00 00 00 0f c0 d0 c3 cc cc cc cc 66 0f c1 d0 c3 cc cc cc cc <0f> 1f 80 00 00 00 00 0f c1 d0 c3 cc cc cc cc 48 0f c1 d0 c3 cc cc [ 36.091186] RSP: 0018:ffffb1f541143c98 EFLAGS: 00000202 [ 36.091188] RAX: 0000000089abcdef RBX: 0000000000000001 RCX: 0000000000000000 [ 36.091188] RDX: 0000000076543210 RSI: ffffffffc073c6d0 RDI: 0000000000000200 [ 36.091189] RBP: ffffb1f541143ca0 R08: ffff9f1803350a70 R09: 0000000000000002 [ 36.091190] R10: ffff9f1803350a70 R11: 0000000000000000 R12: ffff9f1803350a70 [ 36.091190] R13: ffffffffc077fee0 R14: 0000000000000000 R15: 0000000000000000 [ 36.091191] FS: 00007efdfce8d640(0000) GS:ffff9f187dd80000(0000) knlGS:0000000000000000 [ 36.091192] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 36.091192] CR2: 0000000000000000 CR3: 0000000009b62002 CR4: 0000000000772ee0 [ 36.091195] PKRU: 55555554 [ 36.091195] Call Trace: [ 36.091197] [ 36.091198] ? fastop+0x5a/0xa0 [kvm] [ 36.091222] x86_emulate_insn+0x7b8/0xe90 [kvm] [ 36.091244] x86_emulate_instruction+0x2f4/0x630 [kvm] [ 36.091263] ? kvm_arch_vcpu_load+0x7c/0x230 [kvm] [ 36.091283] ? vmx_prepare_switch_to_host+0xf7/0x190 [kvm_intel] [ 36.091290] complete_emulated_mmio+0x297/0x320 [kvm] [ 36.091310] kvm_arch_vcpu_ioctl_run+0x32f/0x550 [kvm] [ 36.091330] kvm_vcpu_ioctl+0x29e/0x6d0 [kvm] [ 36.091344] ? kvm_vcpu_ioctl+0x120/0x6d0 [kvm] [ 36.091357] ? __fget_files+0x86/0xc0 [ 36.091362] ? __fget_files+0x86/0xc0 [ 36.091363] __x64_sys_ioctl+0x92/0xd0 [ 36.091366] do_syscall_64+0x59/0xc0 [ 36.091369] ? syscall_exit_to_user_mode+0x27/0x50 [ 36.091370] ? do_syscall_64+0x69/0xc0 [ 36.091371] ? syscall_exit_to_user_mode+0x27/0x50 [ 36.091372] ? __x64_sys_writev+0x1c/0x30 [ 36.091374] ? do_syscall_64+0x69/0xc0 [ 36.091374] ? exit_to_user_mode_prepare+0x37/0xb0 [ 36.091378] ? syscall_exit_to_user_mode+0x27/0x50 [ 36.091379] ? do_syscall_64+0x69/0xc0 [ 36.091379] ? do_syscall_64+0x69/0xc0 [ 36.091380] ? do_syscall_64+0x69/0xc0 [ 36.091381] ? do_syscall_64+0x69/0xc0 [ 36.091381] entry_SYSCALL_64_after_hwframe+0x61/0xcb [ 36.091384] RIP: 0033:0x7efdfe6d1aff [ 36.091390] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <41> 89 c0 3d 00 f0 ff ff 77 1f 48 8b 44 24 18 64 48 2b 04 25 28 00 [ 36.091391] RSP: 002b:00007efdfce8c460 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 36.091393] RAX: ffffffffffffffda RBX: 000000000000ae80 RCX: 00007efdfe6d1aff [ 36.091393] RDX: 0000000000000000 RSI: 000000000000ae80 RDI: 000000000000000c [ 36.091394] RBP: 0000558f1609e220 R08: 0000558f13fb8190 R09: 00000000ffffffff [ 36.091394] R10: 0000558f16b5e950 R11: 0000000000000246 R12: 0000000000000000 [ 36.091394] R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000000 [ 36.091396] [ 36.091397] Modules linked in: isofs nls_iso8859_1 kvm_intel joydev kvm input_leds serio_raw sch_fq_codel dm_multipath scsi_dh_rdac scsi_dh_emc scsi_dh_alua ipmi_devintf ipmi_msghandler drm msr ip_tables x_tables autofs4 btrfs blake2b_generic zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel virtio_net net_failover crypto_simd ahci xhci_pci cryptd psmouse virtio_blk libahci xhci_pci_renesas failover [ 36.123271] ---[ end trace db3c0ab5a48fabcc ]--- [ 36.123272] RIP: 0010:xaddw_ax_dx+0x9/0x10 [kvm] [ 36.123319] Code: 00 0f bb d0 c3 cc cc cc cc 48 0f bb d0 c3 cc cc cc cc 0f 1f 80 00 00 00 00 0f c0 d0 c3 cc cc cc cc 66 0f c1 d0 c3 cc cc cc cc <0f> 1f 80 00 00 00 00 0f c1 d0 c3 cc cc cc cc 48 0f c1 d0 c3 cc cc [ 36.123320] RSP: 0018:ffffb1f541143c98 EFLAGS: 00000202 [ 36.123321] RAX: 0000000089abcdef RBX: 0000000000000001 RCX: 0000000000000000 [ 36.123321] RDX: 0000000076543210 RSI: ffffffffc073c6d0 RDI: 0000000000000200 [ 36.123322] RBP: ffffb1f541143ca0 R08: ffff9f1803350a70 R09: 0000000000000002 [ 36.123322] R10: ffff9f1803350a70 R11: 0000000000000000 R12: ffff9f1803350a70 [ 36.123323] R13: ffffffffc077fee0 R14: 0000000000000000 R15: 0000000000000000 [ 36.123323] FS: 00007efdfce8d640(0000) GS:ffff9f187dd80000(0000) knlGS:0000000000000000 [ 36.123324] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 36.123325] CR2: 0000000000000000 CR3: 0000000009b62002 CR4: 0000000000772ee0 [ 36.123327] PKRU: 55555554 [ 36.123328] Kernel panic - not syncing: Fatal exception in interrupt [ 36.123410] Kernel Offset: 0x1400000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 36.135305] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- Fixes: aa3d480315ba ("x86: Use return-thunk in asm code") Signed-off-by: Thadeu Lima de Souza Cascardo Co-developed-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Josh Poimboeuf Cc: Paolo Bonzini Reported-by: Linux Kernel Functional Testing Message-Id: <20220713171241.184026-1-cascardo@canonical.com> Tested-by: Jack Wang Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index db96bf7d1122..0a15b0fec6d9 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -189,8 +189,12 @@ #define X8(x...) X4(x), X4(x) #define X16(x...) X8(x), X8(x) -#define NR_FASTOP (ilog2(sizeof(ulong)) + 1) -#define FASTOP_SIZE (8 * (1 + HAS_KERNEL_IBT)) +#define NR_FASTOP (ilog2(sizeof(ulong)) + 1) +#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \ + IS_ENABLED(CONFIG_SLS)) +#define FASTOP_LENGTH (ENDBR_INSN_SIZE + 7 + RET_LENGTH) +#define FASTOP_SIZE (8 << ((FASTOP_LENGTH > 8) & 1) << ((FASTOP_LENGTH > 16) & 1)) +static_assert(FASTOP_LENGTH <= FASTOP_SIZE); struct opcode { u64 flags; @@ -442,8 +446,6 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] * INT3 [1 byte; CONFIG_SLS] */ -#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \ - IS_ENABLED(CONFIG_SLS)) #define SETCC_LENGTH (ENDBR_INSN_SIZE + 3 + RET_LENGTH) #define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1)) static_assert(SETCC_LENGTH <= SETCC_ALIGN); -- cgit From 1b870fa5573e260bc74d19f381ab0dd971a8d8e7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 14 Jul 2022 07:27:31 -0400 Subject: kvm: stats: tell userspace which values are boolean Some of the statistics values exported by KVM are always only 0 or 1. It can be useful to export this fact to userspace so that it can track them specially (for example by polling the value every now and then to compute a % of time spent in a specific state). Therefore, add "boolean value" as a new "unit". While it is not exactly a unit, it walks and quacks like one. In particular, using the type would be wrong because boolean values could be instantaneous or peak values (e.g. "is the rmap allocated?") or even two-bucket histograms (e.g. "number of posted vs. non-posted interrupt injections"). Suggested-by: Amneesh Singh Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 26d0cac32f73..0c3e85e8fce9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -298,7 +298,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, directed_yield_successful), STATS_DESC_COUNTER(VCPU, preemption_reported), STATS_DESC_COUNTER(VCPU, preemption_other), - STATS_DESC_ICOUNTER(VCPU, guest_mode) + STATS_DESC_IBOOLEAN(VCPU, guest_mode) }; const struct kvm_stats_header kvm_vcpu_stats_header = { -- cgit From 8a414f943f8b5f94bbaafdec863d6f3dbef33f8a Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 8 Jul 2022 14:51:47 +0200 Subject: KVM: x86: Fully initialize 'struct kvm_lapic_irq' in kvm_pv_kick_cpu_op() 'vector' and 'trig_mode' fields of 'struct kvm_lapic_irq' are left uninitialized in kvm_pv_kick_cpu_op(). While these fields are normally not needed for APIC_DM_REMRD, they're still referenced by __apic_accept_irq() for trace_kvm_apic_accept_irq(). Fully initialize the structure to avoid consuming random stack memory. Fixes: a183b638b61c ("KVM: x86: make apic_accept_irq tracepoint more generic") Reported-by: syzbot+d6caa905917d353f0d07@syzkaller.appspotmail.com Signed-off-by: Vitaly Kuznetsov Reviewed-by: Sean Christopherson Message-Id: <20220708125147.593975-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0c3e85e8fce9..143e37298d8a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9143,15 +9143,17 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, */ static void kvm_pv_kick_cpu_op(struct kvm *kvm, int apicid) { - struct kvm_lapic_irq lapic_irq; - - lapic_irq.shorthand = APIC_DEST_NOSHORT; - lapic_irq.dest_mode = APIC_DEST_PHYSICAL; - lapic_irq.level = 0; - lapic_irq.dest_id = apicid; - lapic_irq.msi_redir_hint = false; + /* + * All other fields are unused for APIC_DM_REMRD, but may be consumed by + * common code, e.g. for tracing. Defer initialization to the compiler. + */ + struct kvm_lapic_irq lapic_irq = { + .delivery_mode = APIC_DM_REMRD, + .dest_mode = APIC_DEST_PHYSICAL, + .shorthand = APIC_DEST_NOSHORT, + .dest_id = apicid, + }; - lapic_irq.delivery_mode = APIC_DM_REMRD; kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); } -- cgit From 79629181607e801c0b41b8790ac4ee2eb5d7bc3e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 15 Jul 2022 07:34:55 -0400 Subject: KVM: emulate: do not adjust size of fastop and setcc subroutines Instead of doing complicated calculations to find the size of the subroutines (which are even more complicated because they need to be stringified into an asm statement), just hardcode to 16. It is less dense for a few combinations of IBT/SLS/retbleed, but it has the advantage of being really simple. Cc: stable@vger.kernel.org # 5.15.x: 84e7051c0bc1: x86/kvm: fix FASTOP_SIZE when return thunks are enabled Cc: stable@vger.kernel.org Suggested-by: Linus Torvalds Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0a15b0fec6d9..f8382abe22ff 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -189,13 +189,6 @@ #define X8(x...) X4(x), X4(x) #define X16(x...) X8(x), X8(x) -#define NR_FASTOP (ilog2(sizeof(ulong)) + 1) -#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \ - IS_ENABLED(CONFIG_SLS)) -#define FASTOP_LENGTH (ENDBR_INSN_SIZE + 7 + RET_LENGTH) -#define FASTOP_SIZE (8 << ((FASTOP_LENGTH > 8) & 1) << ((FASTOP_LENGTH > 16) & 1)) -static_assert(FASTOP_LENGTH <= FASTOP_SIZE); - struct opcode { u64 flags; u8 intercept; @@ -310,9 +303,15 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for * different operand sizes can be reached by calculation, rather than a jump * table (which would be bigger than the code). + * + * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR + * and 1 for the straight line speculation INT3, leaves 7 bytes for the + * body of the function. Currently none is larger than 4. */ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); +#define FASTOP_SIZE 16 + #define __FOP_FUNC(name) \ ".align " __stringify(FASTOP_SIZE) " \n\t" \ ".type " name ", @function \n\t" \ @@ -446,9 +445,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] * INT3 [1 byte; CONFIG_SLS] */ -#define SETCC_LENGTH (ENDBR_INSN_SIZE + 3 + RET_LENGTH) -#define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1)) -static_assert(SETCC_LENGTH <= SETCC_ALIGN); +#define SETCC_ALIGN 16 #define FOP_SETCC(op) \ ".align " __stringify(SETCC_ALIGN) " \n\t" \ -- cgit