From 5a8781607e677eda60b20e0a4c91d2a5f12f9244 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 25 Jan 2019 07:41:02 -0800 Subject: KVM: nVMX: Cache host_rsp on a per-VMCS basis Currently, host_rsp is cached on a per-vCPU basis, i.e. it's stored in struct vcpu_vmx. In non-nested usage the caching is for all intents and purposes 100% effective, e.g. only the first VMLAUNCH needs to synchronize VMCS.HOST_RSP since the call stack to vmx_vcpu_run() is identical each and every time. But when running a nested guest, KVM must invalidate the cache when switching the current VMCS as it can't guarantee the new VMCS has the same HOST_RSP as the previous VMCS. In other words, the cache loses almost all of its efficacy when running a nested VM. Move host_rsp to struct vmcs_host_state, which is per-VMCS, so that it is cached on a per-VMCS basis and restores its 100% hit rate when nested VMs are in play. Note that the host_rsp cache for vmcs02 essentially "breaks" when nested early checks are enabled as nested_vmx_check_vmentry_hw() will see a different RSP at the time of its VM-Enter. While it's possible to avoid even that VMCS.HOST_RSP synchronization, e.g. by employing a dedicated VM-Exit stack, there is little motivation for doing so as the overhead of two VMWRITEs (~55 cycles) is dwarfed by the overhead of the extra VMX transition (600+ cycles) and is a proverbial drop in the ocean relative to the total cost of a nested transtion (10s of thousands of cycles). Signed-off-by: Sean Christopherson Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'arch/x86/kvm/vmx/vmx.c') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index e9d81cd8421f..12bb61e7aca6 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6381,9 +6381,9 @@ static void __vmx_vcpu_run(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) "sub $%c[wordsize], %%" _ASM_SP "\n\t" /* placeholder for guest RCX */ "push %%" _ASM_CX " \n\t" "sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */ - "cmp %%" _ASM_SP ", %c[host_rsp](%%" _ASM_CX ") \n\t" + "cmp %%" _ASM_SP ", (%%" _ASM_DI ") \n\t" "je 1f \n\t" - "mov %%" _ASM_SP ", %c[host_rsp](%%" _ASM_CX ") \n\t" + "mov %%" _ASM_SP ", (%%" _ASM_DI ") \n\t" /* Avoid VMWRITE when Enlightened VMCS is in use */ "test %%" _ASM_SI ", %%" _ASM_SI " \n\t" "jz 2f \n\t" @@ -6482,11 +6482,10 @@ static void __vmx_vcpu_run(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) "xor %%edi, %%edi \n\t" "xor %%ebp, %%ebp \n\t" "pop %%" _ASM_BP " \n\t" - : ASM_CALL_CONSTRAINT, "=S"((int){0}) - : "c"(vmx), "S"(evmcs_rsp), + : ASM_CALL_CONSTRAINT, "=D"((int){0}), "=S"((int){0}) + : "c"(vmx), "D"(&vmx->loaded_vmcs->host_state.rsp), "S"(evmcs_rsp), [launched]"i"(offsetof(struct vcpu_vmx, __launched)), [fail]"i"(offsetof(struct vcpu_vmx, fail)), - [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)), [HOST_RSP]"i"(HOST_RSP), [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])), [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])), @@ -6509,10 +6508,10 @@ static void __vmx_vcpu_run(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) [wordsize]"i"(sizeof(ulong)) : "cc", "memory" #ifdef CONFIG_X86_64 - , "rax", "rbx", "rdx", "rdi" + , "rax", "rbx", "rdx" , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" #else - , "eax", "ebx", "edx", "edi" + , "eax", "ebx", "edx" #endif ); } -- cgit