diff options
Diffstat (limited to 'arch/x86/kvm/svm')
-rw-r--r-- | arch/x86/kvm/svm/nested.c | 62 | ||||
-rw-r--r-- | arch/x86/kvm/svm/sev.c | 127 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 315 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.h | 22 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm_ops.h | 4 |
5 files changed, 349 insertions, 181 deletions
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 2545d0c61985..f8b7bc04b3e7 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -238,6 +238,18 @@ static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size) kvm_vcpu_is_legal_gpa(vcpu, addr + size - 1); } +static bool nested_svm_check_tlb_ctl(struct kvm_vcpu *vcpu, u8 tlb_ctl) +{ + /* Nested FLUSHBYASID is not supported yet. */ + switch(tlb_ctl) { + case TLB_CONTROL_DO_NOTHING: + case TLB_CONTROL_FLUSH_ALL_ASID: + return true; + default: + return false; + } +} + static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu, struct vmcb_control_area *control) { @@ -257,6 +269,9 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu, IOPM_SIZE))) return false; + if (CC(!nested_svm_check_tlb_ctl(vcpu, control->tlb_ctl))) + return false; + return true; } @@ -538,22 +553,27 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) if (nested_npt_enabled(svm)) nested_svm_init_mmu_context(vcpu); - svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset = - vcpu->arch.l1_tsc_offset + svm->nested.ctl.tsc_offset; + vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset( + vcpu->arch.l1_tsc_offset, + svm->nested.ctl.tsc_offset, + svm->tsc_ratio_msr); + + svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset; + + if (svm->tsc_ratio_msr != kvm_default_tsc_scaling_ratio) { + WARN_ON(!svm->tsc_scaling_enabled); + nested_svm_update_tsc_ratio_msr(vcpu); + } svm->vmcb->control.int_ctl = (svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) | (svm->vmcb01.ptr->control.int_ctl & int_ctl_vmcb01_bits); - svm->vmcb->control.virt_ext = svm->nested.ctl.virt_ext; svm->vmcb->control.int_vector = svm->nested.ctl.int_vector; svm->vmcb->control.int_state = svm->nested.ctl.int_state; svm->vmcb->control.event_inj = svm->nested.ctl.event_inj; svm->vmcb->control.event_inj_err = svm->nested.ctl.event_inj_err; - svm->vmcb->control.pause_filter_count = svm->nested.ctl.pause_filter_count; - svm->vmcb->control.pause_filter_thresh = svm->nested.ctl.pause_filter_thresh; - nested_svm_transition_tlb_flush(vcpu); /* Enter Guest-Mode */ @@ -579,7 +599,7 @@ static void nested_svm_copy_common_state(struct vmcb *from_vmcb, struct vmcb *to } int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, - struct vmcb *vmcb12) + struct vmcb *vmcb12, bool from_vmrun) { struct vcpu_svm *svm = to_svm(vcpu); int ret; @@ -609,13 +629,16 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, nested_vmcb02_prepare_save(svm, vmcb12); ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3, - nested_npt_enabled(svm), true); + nested_npt_enabled(svm), from_vmrun); if (ret) return ret; if (!npt_enabled) vcpu->arch.mmu->inject_page_fault = svm_inject_page_fault_nested; + if (!from_vmrun) + kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); + svm_set_gif(svm, true); return 0; @@ -681,7 +704,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) svm->nested.nested_run_pending = 1; - if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12)) + if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, true)) goto out_exit_err; if (nested_svm_vmrun_msrpm(svm)) @@ -808,11 +831,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm) vmcb12->control.event_inj = svm->nested.ctl.event_inj; vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err; - vmcb12->control.pause_filter_count = - svm->vmcb->control.pause_filter_count; - vmcb12->control.pause_filter_thresh = - svm->vmcb->control.pause_filter_thresh; - nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr); svm_switch_vmcb(svm, &svm->vmcb01); @@ -830,6 +848,12 @@ int nested_svm_vmexit(struct vcpu_svm *svm) vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } + if (svm->tsc_ratio_msr != kvm_default_tsc_scaling_ratio) { + WARN_ON(!svm->tsc_scaling_enabled); + vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio; + svm_write_tsc_multiplier(vcpu, vcpu->arch.tsc_scaling_ratio); + } + svm->nested.ctl.nested_cr3 = 0; /* @@ -1217,6 +1241,16 @@ int nested_svm_exit_special(struct vcpu_svm *svm) return NESTED_EXIT_CONTINUE; } +void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + vcpu->arch.tsc_scaling_ratio = + kvm_calc_nested_tsc_multiplier(vcpu->arch.l1_tsc_scaling_ratio, + svm->tsc_ratio_msr); + svm_write_tsc_multiplier(vcpu, vcpu->arch.tsc_scaling_ratio); +} + static int svm_get_nested_state(struct kvm_vcpu *vcpu, struct kvm_nested_state __user *user_kvm_nested_state, u32 user_data_size) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 75e0b21ad07c..1964b9a174be 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -17,10 +17,10 @@ #include <linux/misc_cgroup.h> #include <linux/processor.h> #include <linux/trace_events.h> -#include <asm/fpu/internal.h> #include <asm/pkru.h> #include <asm/trapnr.h> +#include <asm/fpu/xcr.h> #include "x86.h" #include "svm.h" @@ -595,43 +595,55 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) return 0; } -static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) +static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu, + int *error) { - struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_launch_update_vmsa vmsa; + struct vcpu_svm *svm = to_svm(vcpu); + int ret; + + /* Perform some pre-encryption checks against the VMSA */ + ret = sev_es_sync_vmsa(svm); + if (ret) + return ret; + + /* + * The LAUNCH_UPDATE_VMSA command will perform in-place encryption of + * the VMSA memory content (i.e it will write the same memory region + * with the guest's key), so invalidate it first. + */ + clflush_cache_range(svm->vmsa, PAGE_SIZE); + + vmsa.reserved = 0; + vmsa.handle = to_kvm_svm(kvm)->sev_info.handle; + vmsa.address = __sme_pa(svm->vmsa); + vmsa.len = PAGE_SIZE; + ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error); + if (ret) + return ret; + + vcpu->arch.guest_state_protected = true; + return 0; +} + +static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ struct kvm_vcpu *vcpu; int i, ret; if (!sev_es_guest(kvm)) return -ENOTTY; - vmsa.reserved = 0; - kvm_for_each_vcpu(i, vcpu, kvm) { - struct vcpu_svm *svm = to_svm(vcpu); - - /* Perform some pre-encryption checks against the VMSA */ - ret = sev_es_sync_vmsa(svm); + ret = mutex_lock_killable(&vcpu->mutex); if (ret) return ret; - /* - * The LAUNCH_UPDATE_VMSA command will perform in-place - * encryption of the VMSA memory content (i.e it will write - * the same memory region with the guest's key), so invalidate - * it first. - */ - clflush_cache_range(svm->vmsa, PAGE_SIZE); + ret = __sev_launch_update_vmsa(kvm, vcpu, &argp->error); - vmsa.handle = sev->handle; - vmsa.address = __sme_pa(svm->vmsa); - vmsa.len = PAGE_SIZE; - ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, - &argp->error); + mutex_unlock(&vcpu->mutex); if (ret) return ret; - - svm->vcpu.arch.guest_state_protected = true; } return 0; @@ -1397,8 +1409,10 @@ static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp) /* Bind ASID to this guest */ ret = sev_bind_asid(kvm, start.handle, error); - if (ret) + if (ret) { + sev_decommission(start.handle); goto e_free_session; + } params.handle = start.handle; if (copy_to_user((void __user *)(uintptr_t)argp->data, @@ -1464,12 +1478,19 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) /* Pin guest memory */ guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK, - PAGE_SIZE, &n, 0); + PAGE_SIZE, &n, 1); if (IS_ERR(guest_page)) { ret = PTR_ERR(guest_page); goto e_free_trans; } + /* + * Flush (on non-coherent CPUs) before RECEIVE_UPDATE_DATA, the PSP + * encrypts the written data with the guest's key, and the cache may + * contain dirty, unencrypted data. + */ + sev_clflush_pages(guest_page, n); + /* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */ data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset; data.guest_address |= sev_me_mask; @@ -1501,6 +1522,20 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error); } +static bool cmd_allowed_from_miror(u32 cmd_id) +{ + /* + * Allow mirrors VM to call KVM_SEV_LAUNCH_UPDATE_VMSA to enable SEV-ES + * active mirror VMs. Also allow the debugging and status commands. + */ + if (cmd_id == KVM_SEV_LAUNCH_UPDATE_VMSA || + cmd_id == KVM_SEV_GUEST_STATUS || cmd_id == KVM_SEV_DBG_DECRYPT || + cmd_id == KVM_SEV_DBG_ENCRYPT) + return true; + + return false; +} + int svm_mem_enc_op(struct kvm *kvm, void __user *argp) { struct kvm_sev_cmd sev_cmd; @@ -1517,8 +1552,9 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp) mutex_lock(&kvm->lock); - /* enc_context_owner handles all memory enc operations */ - if (is_mirroring_enc_context(kvm)) { + /* Only the enc_context_owner handles some memory enc operations. */ + if (is_mirroring_enc_context(kvm) && + !cmd_allowed_from_miror(sev_cmd.id)) { r = -EINVAL; goto out; } @@ -1715,8 +1751,7 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd) { struct file *source_kvm_file; struct kvm *source_kvm; - struct kvm_sev_info *mirror_sev; - unsigned int asid; + struct kvm_sev_info source_sev, *mirror_sev; int ret; source_kvm_file = fget(source_fd); @@ -1739,7 +1774,8 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd) goto e_source_unlock; } - asid = to_kvm_svm(source_kvm)->sev_info.asid; + memcpy(&source_sev, &to_kvm_svm(source_kvm)->sev_info, + sizeof(source_sev)); /* * The mirror kvm holds an enc_context_owner ref so its asid can't @@ -1759,8 +1795,16 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd) /* Set enc_context_owner and copy its encryption context over */ mirror_sev = &to_kvm_svm(kvm)->sev_info; mirror_sev->enc_context_owner = source_kvm; - mirror_sev->asid = asid; mirror_sev->active = true; + mirror_sev->asid = source_sev.asid; + mirror_sev->fd = source_sev.fd; + mirror_sev->es_active = source_sev.es_active; + mirror_sev->handle = source_sev.handle; + /* + * Do not copy ap_jump_table. Since the mirror does not share the same + * KVM contexts as the original, and they may have different + * memory-views. + */ mutex_unlock(&kvm->lock); return 0; @@ -2547,11 +2591,20 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) { - if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2)) + int count; + int bytes; + + if (svm->vmcb->control.exit_info_2 > INT_MAX) + return -EINVAL; + + count = svm->vmcb->control.exit_info_2; + if (unlikely(check_mul_overflow(count, size, &bytes))) + return -EINVAL; + + if (!setup_vmgexit_scratch(svm, in, bytes)) return -EINVAL; - return kvm_sev_es_string_io(&svm->vcpu, size, port, - svm->ghcb_sa, svm->ghcb_sa_len, in); + return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->ghcb_sa, count, in); } void sev_es_init_vmcb(struct vcpu_svm *svm) @@ -2599,11 +2652,11 @@ void sev_es_init_vmcb(struct vcpu_svm *svm) set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1); } -void sev_es_create_vcpu(struct vcpu_svm *svm) +void sev_es_vcpu_reset(struct vcpu_svm *svm) { /* - * Set the GHCB MSR value as per the GHCB specification when creating - * a vCPU for an SEV-ES guest. + * Set the GHCB MSR value as per the GHCB specification when emulating + * vCPU RESET for an SEV-ES guest. */ set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX, GHCB_VERSION_MIN, diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 05e8d4d27969..b36ca4e476c2 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -25,6 +25,7 @@ #include <linux/pagemap.h> #include <linux/swap.h> #include <linux/rwsem.h> +#include <linux/cc_platform.h> #include <asm/apic.h> #include <asm/perf_event.h> @@ -36,6 +37,7 @@ #include <asm/spec-ctrl.h> #include <asm/cpu_device_id.h> #include <asm/traps.h> +#include <asm/fpu/api.h> #include <asm/virtext.h> #include "trace.h" @@ -186,6 +188,13 @@ module_param(vls, int, 0444); static int vgif = true; module_param(vgif, int, 0444); +/* enable/disable LBR virtualization */ +static int lbrv = true; +module_param(lbrv, int, 0444); + +static int tsc_scaling = true; +module_param(tsc_scaling, int, 0444); + /* * enable / disable AVIC. Because the defaults differ for APICv * support between VMX and SVM we cannot use module_param_named. @@ -455,7 +464,7 @@ static int has_svm(void) return 0; } - if (sev_active()) { + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { pr_info("KVM is unsupported when running as an SEV guest\n"); return 0; } @@ -466,7 +475,7 @@ static int has_svm(void) static void svm_hardware_disable(void) { /* Make sure we clean up behind us */ - if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) + if (tsc_scaling) wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); cpu_svm_disable(); @@ -509,6 +518,10 @@ static int svm_hardware_enable(void) wrmsrl(MSR_VM_HSAVE_PA, __sme_page_pa(sd->save_area)); if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { + /* + * Set the default value, even if we don't use TSC scaling + * to avoid having stale value in the msr + */ wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); __this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT); } @@ -929,6 +942,9 @@ static __init void svm_set_cpu_caps(void) if (npt_enabled) kvm_cpu_cap_set(X86_FEATURE_NPT); + if (tsc_scaling) + kvm_cpu_cap_set(X86_FEATURE_TSCRATEMSR); + /* Nested VM can receive #VMEXIT instead of triggering #GP */ kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK); } @@ -976,10 +992,15 @@ static __init int svm_hardware_setup(void) if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) kvm_enable_efer_bits(EFER_FFXSR); - if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { - kvm_has_tsc_control = true; - kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX; - kvm_tsc_scaling_ratio_frac_bits = 32; + if (tsc_scaling) { + if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { + tsc_scaling = false; + } else { + pr_info("TSC scaling supported\n"); + kvm_has_tsc_control = true; + kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX; + kvm_tsc_scaling_ratio_frac_bits = 32; + } } tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX); @@ -1059,6 +1080,13 @@ static __init int svm_hardware_setup(void) pr_info("Virtual GIF supported\n"); } + if (lbrv) { + if (!boot_cpu_has(X86_FEATURE_LBRV)) + lbrv = false; + else + pr_info("LBR virtualization supported\n"); + } + svm_set_cpu_caps(); /* @@ -1109,7 +1137,9 @@ static u64 svm_get_l2_tsc_offset(struct kvm_vcpu *vcpu) static u64 svm_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu) { - return kvm_default_tsc_scaling_ratio; + struct vcpu_svm *svm = to_svm(vcpu); + + return svm->tsc_ratio_msr; } static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) @@ -1121,7 +1151,7 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } -static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier) +void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier) { wrmsrl(MSR_AMD64_TSC_RATIO, multiplier); } @@ -1150,6 +1180,38 @@ static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu, } } +static inline void init_vmcb_after_set_cpuid(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + if (guest_cpuid_is_intel(vcpu)) { + /* + * We must intercept SYSENTER_EIP and SYSENTER_ESP + * accesses because the processor only stores 32 bits. + * For the same reason we cannot use virtual VMLOAD/VMSAVE. + */ + svm_set_intercept(svm, INTERCEPT_VMLOAD); + svm_set_intercept(svm, INTERCEPT_VMSAVE); + svm->vmcb->control.virt_ext &= ~VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; + + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 0, 0); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 0, 0); + } else { + /* + * If hardware supports Virtual VMLOAD VMSAVE then enable it + * in VMCB and clear intercepts to avoid #VMEXIT. + */ + if (vls) { + svm_clr_intercept(svm, INTERCEPT_VMLOAD); + svm_clr_intercept(svm, INTERCEPT_VMSAVE); + svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; + } + /* No need to intercept these MSRs */ + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 1, 1); + } +} + static void init_vmcb(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1296,11 +1358,25 @@ static void init_vmcb(struct kvm_vcpu *vcpu) } svm_hv_init_vmcb(svm->vmcb); + init_vmcb_after_set_cpuid(vcpu); vmcb_mark_all_dirty(svm->vmcb); enable_gif(svm); +} + +static void __svm_vcpu_reset(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + svm_vcpu_init_msrpm(vcpu, svm->msrpm); + + svm_init_osvw(vcpu); + vcpu->arch.microcode_version = 0x01000065; + svm->tsc_ratio_msr = kvm_default_tsc_scaling_ratio; + + if (sev_es_guest(vcpu->kvm)) + sev_es_vcpu_reset(svm); } static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) @@ -1311,6 +1387,9 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) svm->virt_spec_ctrl = 0; init_vmcb(vcpu); + + if (!init_event) + __svm_vcpu_reset(vcpu); } void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb) @@ -1346,10 +1425,10 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) /* * SEV-ES guests maintain an encrypted version of their FPU * state which is restored and saved on VMRUN and VMEXIT. - * Free the fpu structure to prevent KVM from attempting to - * access the FPU state. + * Mark vcpu->arch.guest_fpu->fpstate as scratch so it won't + * do xsave/xrstor on it. */ - kvm_free_guest_fpu(vcpu); + fpstate_set_confidential(&vcpu->arch.guest_fpu); } err = avic_init_vcpu(svm); @@ -1370,24 +1449,13 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) svm->vmcb01.ptr = page_address(vmcb01_page); svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT); + svm_switch_vmcb(svm, &svm->vmcb01); if (vmsa_page) svm->vmsa = page_address(vmsa_page); svm->guest_state_loaded = false; - svm_switch_vmcb(svm, &svm->vmcb01); - init_vmcb(vcpu); - - svm_vcpu_init_msrpm(vcpu, svm->msrpm); - - svm_init_osvw(vcpu); - vcpu->arch.microcode_version = 0x01000065; - - if (sev_es_guest(vcpu->kvm)) - /* Perform SEV-ES specific VMCB creation updates */ - sev_es_create_vcpu(svm); - return 0; error_free_vmsa_page: @@ -1447,7 +1515,7 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) vmsave(__sme_page_pa(sd->save_area)); } - if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { + if (tsc_scaling) { u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio; if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) { __this_cpu_write(current_tsc_ratio, tsc_ratio); @@ -1566,6 +1634,8 @@ static void svm_clear_vintr(struct vcpu_svm *svm) svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & V_IRQ_INJECTION_BITS_MASK; + + svm->vmcb->control.int_vector = svm->nested.ctl.int_vector; } vmcb_mark_dirty(svm->vmcb, VMCB_INTR); @@ -2222,6 +2292,10 @@ static int gp_interception(struct kvm_vcpu *vcpu) if (error_code) goto reinject; + /* All SVM instructions expect page aligned RAX */ + if (svm->vmcb->save.rax & ~PAGE_MASK) + goto reinject; + /* Decode the instruction for usage later */ if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK) goto reinject; @@ -2651,6 +2725,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) struct vcpu_svm *svm = to_svm(vcpu); switch (msr_info->index) { + case MSR_AMD64_TSC_RATIO: + if (!msr_info->host_initiated && !svm->tsc_scaling_enabled) + return 1; + msr_info->data = svm->tsc_ratio_msr; + break; case MSR_STAR: msr_info->data = svm->vmcb01.ptr->save.star; break; @@ -2800,6 +2879,19 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) u32 ecx = msr->index; u64 data = msr->data; switch (ecx) { + case MSR_AMD64_TSC_RATIO: + if (!msr->host_initiated && !svm->tsc_scaling_enabled) + return 1; + + if (data & TSC_RATIO_RSVD) + return 1; + + svm->tsc_ratio_msr = data; + + if (svm->tsc_scaling_enabled && is_guest_mode(vcpu)) + nested_svm_update_tsc_ratio_msr(vcpu); + + break; case MSR_IA32_CR_PAT: if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) return 1; @@ -2912,7 +3004,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) svm->tsc_aux = data; break; case MSR_IA32_DEBUGCTLMSR: - if (!boot_cpu_has(X86_FEATURE_LBRV)) { + if (!lbrv) { vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", __func__, data); break; @@ -3272,11 +3364,13 @@ int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code) return svm_exit_handlers[exit_code](vcpu); } -static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2, +static void svm_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, + u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code) { struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control; + *reason = control->exit_code; *info1 = control->exit_info_1; *info2 = control->exit_info_2; *intr_info = control->exit_int_info; @@ -3293,7 +3387,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) struct kvm_run *kvm_run = vcpu->run; u32 exit_code = svm->vmcb->control.exit_code; - trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); + trace_kvm_exit(vcpu, KVM_ISA_SVM); /* SEV-ES guests must use the CR write traps to track CR registers. */ if (!sev_es_guest(vcpu->kvm)) { @@ -3306,7 +3400,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) if (is_guest_mode(vcpu)) { int vmexit; - trace_kvm_nested_vmexit(exit_code, vcpu, KVM_ISA_SVM); + trace_kvm_nested_vmexit(vcpu, KVM_ISA_SVM); vmexit = nested_svm_exit_special(svm); @@ -3774,8 +3868,6 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) pre_svm_run(vcpu); - WARN_ON_ONCE(kvm_apicv_activated(vcpu->kvm) != kvm_vcpu_apicv_active(vcpu)); - sync_lapic_to_cr8(vcpu); if (unlikely(svm->asid != svm->vmcb->control.asid)) { @@ -3995,6 +4087,8 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) && guest_cpuid_has(vcpu, X86_FEATURE_NRIPS); + svm->tsc_scaling_enabled = tsc_scaling && guest_cpuid_has(vcpu, X86_FEATURE_TSCRATEMSR); + svm_recalc_instruction_intercepts(vcpu, svm); /* For sev guests, the memory encryption bit is not reserved in CR3. */ @@ -4021,33 +4115,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_NESTED); } - - if (guest_cpuid_is_intel(vcpu)) { - /* - * We must intercept SYSENTER_EIP and SYSENTER_ESP - * accesses because the processor only stores 32 bits. - * For the same reason we cannot use virtual VMLOAD/VMSAVE. - */ - svm_set_intercept(svm, INTERCEPT_VMLOAD); - svm_set_intercept(svm, INTERCEPT_VMSAVE); - svm->vmcb->control.virt_ext &= ~VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; - - set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 0, 0); - set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 0, 0); - } else { - /* - * If hardware supports Virtual VMLOAD VMSAVE then enable it - * in VMCB and clear intercepts to avoid #VMEXIT. - */ - if (vls) { - svm_clr_intercept(svm, INTERCEPT_VMLOAD); - svm_clr_intercept(svm, INTERCEPT_VMSAVE); - svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; - } - /* No need to intercept these MSRs */ - set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 1, 1); - set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 1, 1); - } + init_vmcb_after_set_cpuid(vcpu); } static bool svm_has_wbinvd_exit(void) @@ -4285,43 +4353,44 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) struct kvm_host_map map_save; int ret; - if (is_guest_mode(vcpu)) { - /* FED8h - SVM Guest */ - put_smstate(u64, smstate, 0x7ed8, 1); - /* FEE0h - SVM Guest VMCB Physical Address */ - put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa); + if (!is_guest_mode(vcpu)) + return 0; - svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; - svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; - svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; + /* FED8h - SVM Guest */ + put_smstate(u64, smstate, 0x7ed8, 1); + /* FEE0h - SVM Guest VMCB Physical Address */ + put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa); - ret = nested_svm_vmexit(svm); - if (ret) - return ret; + svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; + svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; + svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; - /* - * KVM uses VMCB01 to store L1 host state while L2 runs but - * VMCB01 is going to be used during SMM and thus the state will - * be lost. Temporary save non-VMLOAD/VMSAVE state to the host save - * area pointed to by MSR_VM_HSAVE_PA. APM guarantees that the - * format of the area is identical to guest save area offsetted - * by 0x400 (matches the offset of 'struct vmcb_save_area' - * within 'struct vmcb'). Note: HSAVE area may also be used by - * L1 hypervisor to save additional host context (e.g. KVM does - * that, see svm_prepare_guest_switch()) which must be - * preserved. - */ - if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), - &map_save) == -EINVAL) - return 1; + ret = nested_svm_vmexit(svm); + if (ret) + return ret; - BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400); + /* + * KVM uses VMCB01 to store L1 host state while L2 runs but + * VMCB01 is going to be used during SMM and thus the state will + * be lost. Temporary save non-VMLOAD/VMSAVE state to the host save + * area pointed to by MSR_VM_HSAVE_PA. APM guarantees that the + * format of the area is identical to guest save area offsetted + * by 0x400 (matches the offset of 'struct vmcb_save_area' + * within 'struct vmcb'). Note: HSAVE area may also be used by + * L1 hypervisor to save additional host context (e.g. KVM does + * that, see svm_prepare_guest_switch()) which must be + * preserved. + */ + if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), + &map_save) == -EINVAL) + return 1; - svm_copy_vmrun_state(map_save.hva + 0x400, - &svm->vmcb01.ptr->save); + BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400); - kvm_vcpu_unmap(vcpu, &map_save, true); - } + svm_copy_vmrun_state(map_save.hva + 0x400, + &svm->vmcb01.ptr->save); + + kvm_vcpu_unmap(vcpu, &map_save, true); return 0; } @@ -4329,50 +4398,54 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) { struct vcpu_svm *svm = to_svm(vcpu); struct kvm_host_map map, map_save; - int ret = 0; + u64 saved_efer, vmcb12_gpa; + struct vmcb *vmcb12; + int ret; - if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) { - u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0); - u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8); - u64 vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0); - struct vmcb *vmcb12; + if (!guest_cpuid_has(vcpu, X86_FEATURE_LM)) + return 0; - if (guest) { - if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM)) - return 1; + /* Non-zero if SMI arrived while vCPU was in guest mode. */ + if (!GET_SMSTATE(u64, smstate, 0x7ed8)) + return 0; - if (!(saved_efer & EFER_SVME)) - return 1; + if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM)) + return 1; - if (kvm_vcpu_map(vcpu, - gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL) - return 1; + saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0); + if (!(saved_efer & EFER_SVME)) + return 1; - if (svm_allocate_nested(svm)) - return 1; + vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0); + if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL) + return 1; - vmcb12 = map.hva; + ret = 1; + if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save) == -EINVAL) + goto unmap_map; - nested_load_control_from_vmcb12(svm, &vmcb12->control); + if (svm_allocate_nested(svm)) + goto unmap_save; - ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12); - kvm_vcpu_unmap(vcpu, &map, true); + /* + * Restore L1 host state from L1 HSAVE area as VMCB01 was + * used during SMM (see svm_enter_smm()) + */ - /* - * Restore L1 host state from L1 HSAVE area as VMCB01 was - * used during SMM (see svm_enter_smm()) - */ - if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), - &map_save) == -EINVAL) - return 1; + svm_copy_vmrun_state(&svm->vmcb01.ptr->save, map_save.hva + 0x400); - svm_copy_vmrun_state(&svm->vmcb01.ptr->save, - map_save.hva + 0x400); + /* + * Enter the nested guest now + */ - kvm_vcpu_unmap(vcpu, &map_save, true); - } - } + vmcb12 = map.hva; + nested_load_control_from_vmcb12(svm, &vmcb12->control); + ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false); +unmap_save: + kvm_vcpu_unmap(vcpu, &map_save, true); +unmap_map: + kvm_vcpu_unmap(vcpu, &map, true); return ret; } @@ -4509,6 +4582,8 @@ static int svm_vm_init(struct kvm *kvm) } static struct kvm_x86_ops svm_x86_ops __initdata = { + .name = "kvm_amd", + .hardware_unsetup = svm_hardware_teardown, .hardware_enable = svm_hardware_enable, .hardware_disable = svm_hardware_disable, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 524d943f3efc..5e9510d4574e 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -140,6 +140,8 @@ struct vcpu_svm { u64 next_rip; u64 spec_ctrl; + + u64 tsc_ratio_msr; /* * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be * translated into the appropriate L2_CFG bits on the host to @@ -160,7 +162,8 @@ struct vcpu_svm { unsigned long int3_rip; /* cached guest cpuid flags for faster access */ - bool nrips_enabled : 1; + bool nrips_enabled : 1; + bool tsc_scaling_enabled : 1; u32 ldr_reg; u32 dfr_reg; @@ -191,7 +194,7 @@ struct vcpu_svm { /* SEV-ES scratch area support */ void *ghcb_sa; - u64 ghcb_sa_len; + u32 ghcb_sa_len; bool ghcb_sa_sync; bool ghcb_sa_free; @@ -218,12 +221,12 @@ DECLARE_PER_CPU(struct svm_cpu_data *, svm_data); void recalc_intercepts(struct vcpu_svm *svm); -static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm) +static __always_inline struct kvm_svm *to_kvm_svm(struct kvm *kvm) { return container_of(kvm, struct kvm_svm, kvm); } -static inline bool sev_guest(struct kvm *kvm) +static __always_inline bool sev_guest(struct kvm *kvm) { #ifdef CONFIG_KVM_AMD_SEV struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; @@ -234,7 +237,7 @@ static inline bool sev_guest(struct kvm *kvm) #endif } -static inline bool sev_es_guest(struct kvm *kvm) +static __always_inline bool sev_es_guest(struct kvm *kvm) { #ifdef CONFIG_KVM_AMD_SEV struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; @@ -271,7 +274,7 @@ static inline bool vmcb_is_dirty(struct vmcb *vmcb, int bit) return !test_bit(bit, (unsigned long *)&vmcb->control.clean); } -static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) +static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) { return container_of(vcpu, struct vcpu_svm, vcpu); } @@ -459,7 +462,8 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm) return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_NMI); } -int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb_gpa, struct vmcb *vmcb12); +int enter_svm_guest_mode(struct kvm_vcpu *vcpu, + u64 vmcb_gpa, struct vmcb *vmcb12, bool from_vmrun); void svm_leave_nested(struct vcpu_svm *svm); void svm_free_nested(struct vcpu_svm *svm); int svm_allocate_nested(struct vcpu_svm *svm); @@ -482,6 +486,8 @@ int nested_svm_check_permissions(struct kvm_vcpu *vcpu); int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); int nested_svm_exit_special(struct vcpu_svm *svm); +void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu); +void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier); void nested_load_control_from_vmcb12(struct vcpu_svm *svm, struct vmcb_control_area *control); void nested_sync_control_from_vmcb02(struct vcpu_svm *svm); @@ -561,7 +567,7 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu); int sev_handle_vmgexit(struct kvm_vcpu *vcpu); int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in); void sev_es_init_vmcb(struct vcpu_svm *svm); -void sev_es_create_vcpu(struct vcpu_svm *svm); +void sev_es_vcpu_reset(struct vcpu_svm *svm); void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu); void sev_es_unmap_ghcb(struct vcpu_svm *svm); diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h index 22e2b019de37..9430d6437c9f 100644 --- a/arch/x86/kvm/svm/svm_ops.h +++ b/arch/x86/kvm/svm/svm_ops.h @@ -56,12 +56,12 @@ static inline void invlpga(unsigned long addr, u32 asid) * VMSAVE, VMLOAD, etc... is still controlled by the effective address size, * hence 'unsigned long' instead of 'hpa_t'. */ -static inline void vmsave(unsigned long pa) +static __always_inline void vmsave(unsigned long pa) { svm_asm1(vmsave, "a" (pa), "memory"); } -static inline void vmload(unsigned long pa) +static __always_inline void vmload(unsigned long pa) { svm_asm1(vmload, "a" (pa), "memory"); } |