From ccb2280ec2f9e805d70f57a3a1c5deff0d532cb3 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Wed, 25 Oct 2023 01:59:13 -0400 Subject: x86/kvm: Use separate percpu variable to track the enabling of asyncpf Refer to commit fd10cde9294f ("KVM paravirt: Add async PF initialization to PV guest") and commit 344d9588a9df ("KVM: Add PV MSR to enable asynchronous page faults delivery"). It turns out that at the time when asyncpf was introduced, the purpose was defining the shared PV data 'struct kvm_vcpu_pv_apf_data' with the size of 64 bytes. However, it made a mistake and defined the size to 68 bytes, which failed to make fit in a cache line and made the code inconsistent with the documentation. Below justification quoted from Sean[*] KVM (the host side) has *never* read kvm_vcpu_pv_apf_data.enabled, and the documentation clearly states that enabling is based solely on the bit in the synthetic MSR. So rather than update the documentation, fix the goof by removing the enabled filed and use the separate percpu variable instread. KVM-as-a-host obviously doesn't enforce anything or consume the size, and changing the header will only affect guests that are rebuilt against the new header, so there's no chance of ABI breakage between KVM and its guests. The only possible breakage is if some other hypervisor is emulating KVM's async #PF (LOL) and relies on the guest to set kvm_vcpu_pv_apf_data.enabled. But (a) I highly doubt such a hypervisor exists, (b) that would arguably be a violation of KVM's "spec", and (c) the worst case scenario is that the guest would simply lose async #PF functionality. [*] https://lore.kernel.org/all/ZS7ERnnRqs8Fl0ZF@google.com/T/#u Suggested-by: Sean Christopherson Signed-off-by: Xiaoyao Li Link: https://lore.kernel.org/r/20231025055914.1201792-2-xiaoyao.li@intel.com [sean: use true/false instead of 1/0 for booleans] Signed-off-by: Sean Christopherson --- arch/x86/include/uapi/asm/kvm_para.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 6e64b27b2c1e..605899594ebb 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -142,7 +142,6 @@ struct kvm_vcpu_pv_apf_data { __u32 token; __u8 pad[56]; - __u32 enabled; }; #define KVM_PV_EOI_BIT 0 -- cgit From f3c80061c0d35c60709088ccb019305796d3f6ff Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 9 Feb 2024 13:37:33 -0500 Subject: KVM: SEV: fix compat ABI for KVM_MEMORY_ENCRYPT_OP The data structs for KVM_MEMORY_ENCRYPT_OP have different sizes for 32- and 64-bit userspace, but they do not make any attempt to convert from one ABI to the other when 32-bit userspace is running on 64-bit kernels. This configuration never worked, and SEV is only for 64-bit kernels so we're not breaking ABI on 32-bit kernels. Fix this by adding the appropriate padding; no functional change intended for 64-bit userspace. Reviewed-by: Michael Roth Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/kvm.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index ad29984d5e39..ef11aa4cab42 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -694,6 +694,7 @@ enum sev_cmd_id { struct kvm_sev_cmd { __u32 id; + __u32 pad0; __u64 data; __u32 error; __u32 sev_fd; @@ -704,28 +705,35 @@ struct kvm_sev_launch_start { __u32 policy; __u64 dh_uaddr; __u32 dh_len; + __u32 pad0; __u64 session_uaddr; __u32 session_len; + __u32 pad1; }; struct kvm_sev_launch_update_data { __u64 uaddr; __u32 len; + __u32 pad0; }; struct kvm_sev_launch_secret { __u64 hdr_uaddr; __u32 hdr_len; + __u32 pad0; __u64 guest_uaddr; __u32 guest_len; + __u32 pad1; __u64 trans_uaddr; __u32 trans_len; + __u32 pad2; }; struct kvm_sev_launch_measure { __u64 uaddr; __u32 len; + __u32 pad0; }; struct kvm_sev_guest_status { @@ -738,33 +746,43 @@ struct kvm_sev_dbg { __u64 src_uaddr; __u64 dst_uaddr; __u32 len; + __u32 pad0; }; struct kvm_sev_attestation_report { __u8 mnonce[16]; __u64 uaddr; __u32 len; + __u32 pad0; }; struct kvm_sev_send_start { __u32 policy; + __u32 pad0; __u64 pdh_cert_uaddr; __u32 pdh_cert_len; + __u32 pad1; __u64 plat_certs_uaddr; __u32 plat_certs_len; + __u32 pad2; __u64 amd_certs_uaddr; __u32 amd_certs_len; + __u32 pad3; __u64 session_uaddr; __u32 session_len; + __u32 pad4; }; struct kvm_sev_send_update_data { __u64 hdr_uaddr; __u32 hdr_len; + __u32 pad0; __u64 guest_uaddr; __u32 guest_len; + __u32 pad1; __u64 trans_uaddr; __u32 trans_len; + __u32 pad2; }; struct kvm_sev_receive_start { @@ -772,17 +790,22 @@ struct kvm_sev_receive_start { __u32 policy; __u64 pdh_uaddr; __u32 pdh_len; + __u32 pad0; __u64 session_uaddr; __u32 session_len; + __u32 pad1; }; struct kvm_sev_receive_update_data { __u64 hdr_uaddr; __u32 hdr_len; + __u32 pad0; __u64 guest_uaddr; __u32 guest_len; + __u32 pad1; __u64 trans_uaddr; __u32 trans_len; + __u32 pad2; }; #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) -- cgit From 6a537453000a916392fcac1acb96c1d9d1e05b74 Mon Sep 17 00:00:00 2001 From: Joan Bruguera Micó Date: Mon, 1 Apr 2024 20:55:30 +0200 Subject: x86/bpf: Fix IP for relocating call depth accounting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit: 59bec00ace28 ("x86/percpu: Introduce %rip-relative addressing to PER_CPU_VAR()") made PER_CPU_VAR() to use rip-relative addressing, hence INCREMENT_CALL_DEPTH macro and skl_call_thunk_template got rip-relative asm code inside of it. A follow up commit: 17bce3b2ae2d ("x86/callthunks: Handle %rip-relative relocations in call thunk template") changed x86_call_depth_emit_accounting() to use apply_relocation(), but mistakenly assumed that the code is being patched in-place (where the destination of the relocation matches the address of the code), using *pprog as the destination ip. This is not true for the call depth accounting, emitted by the BPF JIT, so the calculated address was wrong, JIT-ed BPF progs on kernels with call depth tracking got broken and usually caused a page fault. Pass the destination IP when the BPF JIT emits call depth accounting. Fixes: 17bce3b2ae2d ("x86/callthunks: Handle %rip-relative relocations in call thunk template") Signed-off-by: Joan Bruguera Micó Reviewed-by: Uros Bizjak Acked-by: Ingo Molnar Cc: Alexei Starovoitov Cc: Daniel Borkmann Link: https://lore.kernel.org/r/20240401185821.224068-3-ubizjak@gmail.com Signed-off-by: Alexei Starovoitov --- arch/x86/include/asm/alternative.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index fcd20c6dc7f9..67b68d0d17d1 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -117,7 +117,7 @@ extern void callthunks_patch_builtin_calls(void); extern void callthunks_patch_module_calls(struct callthunk_sites *sites, struct module *mod); extern void *callthunks_translate_call_dest(void *dest); -extern int x86_call_depth_emit_accounting(u8 **pprog, void *func); +extern int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip); #else static __always_inline void callthunks_patch_builtin_calls(void) {} static __always_inline void @@ -128,7 +128,7 @@ static __always_inline void *callthunks_translate_call_dest(void *dest) return dest; } static __always_inline int x86_call_depth_emit_accounting(u8 **pprog, - void *func) + void *func, void *ip) { return 0; } -- cgit From 8cb4a9a82b21623dbb4b3051dd30d98356cf95bc Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 4 Apr 2024 17:16:14 -0700 Subject: x86/cpufeatures: Add CPUID_LNX_5 to track recently added Linux-defined word Add CPUID_LNX_5 to track cpufeatures' word 21, and add the appropriate compile-time assert in KVM to prevent direct lookups on the features in CPUID_LNX_5. KVM uses X86_FEATURE_* flags to manage guest CPUID, and so must translate features that are scattered by Linux from the Linux-defined bit to the hardware-defined bit, i.e. should never try to directly access scattered features in guest CPUID. Opportunistically add NR_CPUID_WORDS to enum cpuid_leafs, along with a compile-time assert in KVM's CPUID infrastructure to ensure that future additions update cpuid_leafs along with NCAPINTS. No functional change intended. Fixes: 7f274e609f3d ("x86/cpufeatures: Add new word for scattered features") Cc: Sandipan Das Signed-off-by: Sean Christopherson Acked-by: Dave Hansen Signed-off-by: Linus Torvalds --- arch/x86/include/asm/cpufeature.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 42157ddcc09d..686e92d2663e 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -33,6 +33,8 @@ enum cpuid_leafs CPUID_7_EDX, CPUID_8000_001F_EAX, CPUID_8000_0021_EAX, + CPUID_LNX_5, + NR_CPUID_WORDS, }; #define X86_CAP_FMT_NUM "%d:%d" -- cgit