From 9a1dfeff4414112ce89652a5017538016ccd656c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 13 Dec 2024 14:36:25 -0500 Subject: KVM: x86: clear vcpu->run->hypercall.ret before exiting for KVM_EXIT_HYPERCALL QEMU up to 9.2.0 is assuming that vcpu->run->hypercall.ret is 0 on exit and it never modifies it when processing KVM_EXIT_HYPERCALL. Make this explicit in the code, to avoid breakage when KVM starts modifying that field. This in principle is not a good idea... It would have been much better if KVM had set the field to -KVM_ENOSYS from the beginning, so that a dumb userspace that does nothing on KVM_EXIT_HYPERCALL would tell the guest it does not support KVM_HC_MAP_GPA_RANGE. However, breaking userspace is a Very Bad Thing, as everybody should know. Reported-by: Binbin Wu Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 14 ++++++++++++++ arch/x86/kvm/x86.c | 7 +++++++ 2 files changed, 21 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 943bd074a5d3..9ffb0fb5aacd 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3634,6 +3634,13 @@ static int snp_begin_psc_msr(struct vcpu_svm *svm, u64 ghcb_msr) vcpu->run->exit_reason = KVM_EXIT_HYPERCALL; vcpu->run->hypercall.nr = KVM_HC_MAP_GPA_RANGE; + /* + * In principle this should have been -KVM_ENOSYS, but userspace (QEMU <=9.2) + * assumed that vcpu->run->hypercall.ret is never changed by KVM and thus that + * it was always zero on KVM_EXIT_HYPERCALL. Since KVM is now overwriting + * vcpu->run->hypercall.ret, ensuring that it is zero to not break QEMU. + */ + vcpu->run->hypercall.ret = 0; vcpu->run->hypercall.args[0] = gpa; vcpu->run->hypercall.args[1] = 1; vcpu->run->hypercall.args[2] = (op == SNP_PAGE_STATE_PRIVATE) @@ -3797,6 +3804,13 @@ next_range: case VMGEXIT_PSC_OP_SHARED: vcpu->run->exit_reason = KVM_EXIT_HYPERCALL; vcpu->run->hypercall.nr = KVM_HC_MAP_GPA_RANGE; + /* + * In principle this should have been -KVM_ENOSYS, but userspace (QEMU <=9.2) + * assumed that vcpu->run->hypercall.ret is never changed by KVM and thus that + * it was always zero on KVM_EXIT_HYPERCALL. Since KVM is now overwriting + * vcpu->run->hypercall.ret, ensuring that it is zero to not break QEMU. + */ + vcpu->run->hypercall.ret = 0; vcpu->run->hypercall.args[0] = gfn_to_gpa(gfn); vcpu->run->hypercall.args[1] = npages; vcpu->run->hypercall.args[2] = entry_start.operation == VMGEXIT_PSC_OP_PRIVATE diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1b04092ec76a..615cba6fe466 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10052,6 +10052,13 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, vcpu->run->exit_reason = KVM_EXIT_HYPERCALL; vcpu->run->hypercall.nr = KVM_HC_MAP_GPA_RANGE; + /* + * In principle this should have been -KVM_ENOSYS, but userspace (QEMU <=9.2) + * assumed that vcpu->run->hypercall.ret is never changed by KVM and thus that + * it was always zero on KVM_EXIT_HYPERCALL. Since KVM is now overwriting + * vcpu->run->hypercall.ret, ensuring that it is zero to not break QEMU. + */ + vcpu->run->hypercall.ret = 0; vcpu->run->hypercall.args[0] = gpa; vcpu->run->hypercall.args[1] = npages; vcpu->run->hypercall.args[2] = attrs; -- cgit From c4c083d95105445ebf1cadfb7d05dd2630976241 Mon Sep 17 00:00:00 2001 From: Binbin Wu Date: Wed, 27 Nov 2024 16:43:40 -0800 Subject: KVM: x86: Add a helper to check for user interception of KVM hypercalls Add and use user_exit_on_hypercall() to check if userspace wants to handle a KVM hypercall instead of open-coding the logic everywhere. No functional change intended. Signed-off-by: Binbin Wu Reviewed-by: Isaku Yamahata Reviewed-by: Kai Huang Reviewed-by: Xiaoyao Li [sean: squash into one patch, keep explicit KVM_HC_MAP_GPA_RANGE check] Signed-off-by: Sean Christopherson Reviewed-by: Tom Lendacky Message-ID: <20241128004344.4072099-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 4 ++-- arch/x86/kvm/x86.c | 2 +- arch/x86/kvm/x86.h | 5 +++++ 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 9ffb0fb5aacd..5eb7a206f405 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3627,7 +3627,7 @@ static int snp_begin_psc_msr(struct vcpu_svm *svm, u64 ghcb_msr) return 1; /* resume guest */ } - if (!(vcpu->kvm->arch.hypercall_exit_enabled & (1 << KVM_HC_MAP_GPA_RANGE))) { + if (!user_exit_on_hypercall(vcpu->kvm, KVM_HC_MAP_GPA_RANGE)) { set_ghcb_msr(svm, GHCB_MSR_PSC_RESP_ERROR); return 1; /* resume guest */ } @@ -3717,7 +3717,7 @@ static int snp_begin_psc(struct vcpu_svm *svm, struct psc_buffer *psc) bool huge; u64 gfn; - if (!(vcpu->kvm->arch.hypercall_exit_enabled & (1 << KVM_HC_MAP_GPA_RANGE))) { + if (!user_exit_on_hypercall(vcpu->kvm, KVM_HC_MAP_GPA_RANGE)) { snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC); return 1; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 615cba6fe466..d485a53ac410 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10041,7 +10041,7 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, u64 gpa = a0, npages = a1, attrs = a2; ret = -KVM_ENOSYS; - if (!(vcpu->kvm->arch.hypercall_exit_enabled & (1 << KVM_HC_MAP_GPA_RANGE))) + if (!user_exit_on_hypercall(vcpu->kvm, KVM_HC_MAP_GPA_RANGE)) break; if (!PAGE_ALIGNED(gpa) || !npages || diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index ec623d23d13d..45dd53284dbd 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -612,4 +612,9 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size, unsigned int port, void *data, unsigned int count, int in); +static inline bool user_exit_on_hypercall(struct kvm *kvm, unsigned long hc_nr) +{ + return kvm->arch.hypercall_exit_enabled & BIT(hc_nr); +} + #endif -- cgit From 13b64ce1b635d3bbf7209c2fff7d2ac2c15d54d0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 16:43:41 -0800 Subject: KVM: x86: Move "emulate hypercall" function declarations to x86.h Move the declarations for the hypercall emulation APIs to x86.h. While the helpers are exported, they are intended to be consumed only by KVM vendor modules, i.e. don't need to be exposed to the kernel at-large. No functional change intended. Signed-off-by: Sean Christopherson Reviewed-by: Binbin Wu Reviewed-by: Kai Huang Reviewed-by: Tom Lendacky Reviewed-by: Xiaoyao Li Message-ID: <20241128004344.4072099-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 6 ------ arch/x86/kvm/x86.h | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e159e44a6a1b..c1251b371421 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2181,12 +2181,6 @@ static inline void kvm_clear_apicv_inhibit(struct kvm *kvm, kvm_set_or_clear_apicv_inhibit(kvm, reason, false); } -unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, - unsigned long a0, unsigned long a1, - unsigned long a2, unsigned long a3, - int op_64_bit, int cpl); -int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); - int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len); void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 45dd53284dbd..6db13b696468 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -617,4 +617,10 @@ static inline bool user_exit_on_hypercall(struct kvm *kvm, unsigned long hc_nr) return kvm->arch.hypercall_exit_enabled & BIT(hc_nr); } +unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, + unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + int op_64_bit, int cpl); +int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); + #endif -- cgit From 05a518b49dd6f674cd0b1fe1eb6c8f9c3953b63d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 16:43:42 -0800 Subject: KVM: x86: Bump hypercall stat prior to fully completing hypercall Increment the "hypercalls" stat for KVM hypercalls as soon as KVM knows it will skip the guest instruction, i.e. once KVM is committed to emulating the hypercall. Waiting until completion adds no known value, and creates a discrepancy where the stat will be bumped if KVM exits to userspace as a result of trying to skip the instruction, but not if the hypercall itself exits. Handling the stat in common code will also avoid the need for another helper to dedup code when TDX comes along (TDX needs a separate completion path due to GPR usage differences). Signed-off-by: Sean Christopherson Reviewed-by: Binbin Wu Reviewed-by: Kai Huang Reviewed-by: Tom Lendacky Reviewed-by: Xiaoyao Li Message-ID: <20241128004344.4072099-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d485a53ac410..754d8166eabf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9979,7 +9979,6 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu) if (!is_64_bit_hypercall(vcpu)) ret = (u32)ret; kvm_rax_write(vcpu, ret); - ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); } @@ -9990,6 +9989,8 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, { unsigned long ret; + ++vcpu->stat.hypercalls; + trace_kvm_hypercall(nr, a0, a1, a2, a3); if (!op_64_bit) { @@ -10068,7 +10069,6 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, WARN_ON_ONCE(vcpu->run->hypercall.flags & KVM_EXIT_HYPERCALL_MBZ); vcpu->arch.complete_userspace_io = complete_hypercall_exit; - /* stat is incremented on completion. */ return 0; } default: @@ -10077,7 +10077,6 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, } out: - ++vcpu->stat.hypercalls; return ret; } EXPORT_SYMBOL_GPL(__kvm_emulate_hypercall); -- cgit From d9eb86a6f43d74f08ee3b6eb99ad7eb2a7d7fce0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 16:43:43 -0800 Subject: KVM: x86: Always complete hypercall via function callback Finish "emulation" of KVM hypercalls by function callback, even when the hypercall is handled entirely within KVM, i.e. doesn't require an exit to userspace, and refactor __kvm_emulate_hypercall()'s return value to *only* communicate whether or not KVM should exit to userspace or resume the guest. (Ab)Use vcpu->run->hypercall.ret to propagate the return value to the callback, purely to avoid having to add a trampoline for every completion callback. Using the function return value for KVM's control flow eliminates the multiplexed return value, where '0' for KVM_HC_MAP_GPA_RANGE (and only that hypercall) means "exit to userspace". Note, the unnecessary extra indirect call and thus potential retpoline will be eliminated in the near future by converting the intermediate layer to a macro. Suggested-by: Binbin Wu Suggested-by: Kai Huang Signed-off-by: Sean Christopherson Reviewed-by: Kai Huang Message-ID: <20241128004344.4072099-6-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 28 +++++++++++----------------- arch/x86/kvm/x86.h | 10 ++++++---- 2 files changed, 17 insertions(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 754d8166eabf..53ef7fa7094b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9982,10 +9982,11 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } -unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, - unsigned long a0, unsigned long a1, - unsigned long a2, unsigned long a3, - int op_64_bit, int cpl) +int __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, + unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + int op_64_bit, int cpl, + int (*complete_hypercall)(struct kvm_vcpu *)) { unsigned long ret; @@ -10068,7 +10069,7 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, vcpu->run->hypercall.flags |= KVM_EXIT_HYPERCALL_LONG_MODE; WARN_ON_ONCE(vcpu->run->hypercall.flags & KVM_EXIT_HYPERCALL_MBZ); - vcpu->arch.complete_userspace_io = complete_hypercall_exit; + vcpu->arch.complete_userspace_io = complete_hypercall; return 0; } default: @@ -10077,13 +10078,14 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, } out: - return ret; + vcpu->run->hypercall.ret = ret; + return complete_hypercall(vcpu); } EXPORT_SYMBOL_GPL(__kvm_emulate_hypercall); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { - unsigned long nr, a0, a1, a2, a3, ret; + unsigned long nr, a0, a1, a2, a3; int op_64_bit; int cpl; @@ -10101,16 +10103,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) op_64_bit = is_64_bit_hypercall(vcpu); cpl = kvm_x86_call(get_cpl)(vcpu); - ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl); - if (nr == KVM_HC_MAP_GPA_RANGE && !ret) - /* MAP_GPA tosses the request to the user space. */ - return 0; - - if (!op_64_bit) - ret = (u32)ret; - kvm_rax_write(vcpu, ret); - - return kvm_skip_emulated_instruction(vcpu); + return __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl, + complete_hypercall_exit); } EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 6db13b696468..28adc8ea04bf 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -617,10 +617,12 @@ static inline bool user_exit_on_hypercall(struct kvm *kvm, unsigned long hc_nr) return kvm->arch.hypercall_exit_enabled & BIT(hc_nr); } -unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, - unsigned long a0, unsigned long a1, - unsigned long a2, unsigned long a3, - int op_64_bit, int cpl); +int __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, + unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + int op_64_bit, int cpl, + int (*complete_hypercall)(struct kvm_vcpu *)); + int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); #endif -- cgit From c50be1c9457d6c7486a7f592aa96ffbb8c3cde96 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 10 Dec 2024 11:21:03 -0500 Subject: KVM: x86: Refactor __kvm_emulate_hypercall() into a macro Rework __kvm_emulate_hypercall() into a macro so that completion of hypercalls that don't exit to userspace use direct function calls to the completion helper, i.e. don't trigger a retpoline when RETPOLINE=y. Opportunistically take the names of the input registers, as opposed to taking the input values, to preemptively dedup more of the calling code (TDX needs to use different registers). Use the direct GPR accessors to read values to avoid the pointless marking of the registers as available (KVM requires GPRs to always be available). Signed-off-by: Sean Christopherson Reviewed-by: Binbin Wu Reviewed-by: Kai Huang Message-ID: <20241128004344.4072099-7-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 30 ++++++++++-------------------- arch/x86/kvm/x86.h | 25 ++++++++++++++++++++----- 2 files changed, 30 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 53ef7fa7094b..ad3fc35703a8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9982,11 +9982,11 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } -int __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, - unsigned long a0, unsigned long a1, - unsigned long a2, unsigned long a3, - int op_64_bit, int cpl, - int (*complete_hypercall)(struct kvm_vcpu *)) +int ____kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, + unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + int op_64_bit, int cpl, + int (*complete_hypercall)(struct kvm_vcpu *)) { unsigned long ret; @@ -10079,31 +10079,21 @@ int __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, out: vcpu->run->hypercall.ret = ret; - return complete_hypercall(vcpu); + return 1; } -EXPORT_SYMBOL_GPL(__kvm_emulate_hypercall); +EXPORT_SYMBOL_GPL(____kvm_emulate_hypercall); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { - unsigned long nr, a0, a1, a2, a3; - int op_64_bit; - int cpl; - if (kvm_xen_hypercall_enabled(vcpu->kvm)) return kvm_xen_hypercall(vcpu); if (kvm_hv_hypercall_enabled(vcpu)) return kvm_hv_hypercall(vcpu); - nr = kvm_rax_read(vcpu); - a0 = kvm_rbx_read(vcpu); - a1 = kvm_rcx_read(vcpu); - a2 = kvm_rdx_read(vcpu); - a3 = kvm_rsi_read(vcpu); - op_64_bit = is_64_bit_hypercall(vcpu); - cpl = kvm_x86_call(get_cpl)(vcpu); - - return __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl, + return __kvm_emulate_hypercall(vcpu, rax, rbx, rcx, rdx, rsi, + is_64_bit_hypercall(vcpu), + kvm_x86_call(get_cpl)(vcpu), complete_hypercall_exit); } EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 28adc8ea04bf..b00ecbfef000 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -617,11 +617,26 @@ static inline bool user_exit_on_hypercall(struct kvm *kvm, unsigned long hc_nr) return kvm->arch.hypercall_exit_enabled & BIT(hc_nr); } -int __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, - unsigned long a0, unsigned long a1, - unsigned long a2, unsigned long a3, - int op_64_bit, int cpl, - int (*complete_hypercall)(struct kvm_vcpu *)); +int ____kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, + unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + int op_64_bit, int cpl, + int (*complete_hypercall)(struct kvm_vcpu *)); + +#define __kvm_emulate_hypercall(_vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl, complete_hypercall) \ +({ \ + int __ret; \ + \ + __ret = ____kvm_emulate_hypercall(_vcpu, \ + kvm_##nr##_read(_vcpu), kvm_##a0##_read(_vcpu), \ + kvm_##a1##_read(_vcpu), kvm_##a2##_read(_vcpu), \ + kvm_##a3##_read(_vcpu), op_64_bit, cpl, \ + complete_hypercall); \ + \ + if (__ret > 0) \ + __ret = complete_hypercall(_vcpu); \ + __ret; \ +}) int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); -- cgit