diff options
Diffstat (limited to 'arch/x86/kvm/svm/sev.c')
-rw-r--r-- | arch/x86/kvm/svm/sev.c | 205 |
1 files changed, 140 insertions, 65 deletions
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 5a69b657dae9..2fbdebf79fbb 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -117,6 +117,7 @@ static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid) */ down_write(&sev_deactivate_lock); + /* SNP firmware requires use of WBINVD for ASID recycling. */ wbinvd_on_all_cpus(); if (sev_snp_enabled) @@ -446,7 +447,12 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, init_args.probe = false; ret = sev_platform_init(&init_args); if (ret) - goto e_free; + goto e_free_asid; + + if (!zalloc_cpumask_var(&sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) { + ret = -ENOMEM; + goto e_free_asid; + } /* This needs to happen after SEV/SNP firmware initialization. */ if (vm_type == KVM_X86_SNP_VM) { @@ -464,6 +470,8 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, return 0; e_free: + free_cpumask_var(sev->have_run_cpus); +e_free_asid: argp->error = init_args.error; sev_asid_free(sev); sev->asid = 0; @@ -708,6 +716,33 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages) } } +static void sev_writeback_caches(struct kvm *kvm) +{ + /* + * Note, the caller is responsible for ensuring correctness if the mask + * can be modified, e.g. if a CPU could be doing VMRUN. + */ + if (cpumask_empty(to_kvm_sev_info(kvm)->have_run_cpus)) + return; + + /* + * Ensure that all dirty guest tagged cache entries are written back + * before releasing the pages back to the system for use. CLFLUSH will + * not do this without SME_COHERENT, and flushing many cache lines + * individually is slower than blasting WBINVD for large VMs, so issue + * WBNOINVD (or WBINVD if the "no invalidate" variant is unsupported) + * on CPUs that have done VMRUN, i.e. may have dirtied data using the + * VM's ASID. + * + * For simplicity, never remove CPUs from the bitmap. Ideally, KVM + * would clear the mask when flushing caches, but doing so requires + * serializing multiple calls and having responding CPUs (to the IPI) + * mark themselves as still running if they are running (or about to + * run) a vCPU for the VM. + */ + wbnoinvd_on_cpus_mask(to_kvm_sev_info(kvm)->have_run_cpus); +} + static unsigned long get_num_contig_pages(unsigned long idx, struct page **inpages, unsigned long npages) { @@ -1971,6 +2006,10 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src) struct kvm_vcpu *src_vcpu; unsigned long i; + if (src->created_vcpus != atomic_read(&src->online_vcpus) || + dst->created_vcpus != atomic_read(&dst->online_vcpus)) + return -EBUSY; + if (!sev_es_guest(src)) return 0; @@ -2033,6 +2072,17 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) if (ret) goto out_source_vcpu; + /* + * Allocate a new have_run_cpus for the destination, i.e. don't copy + * the set of CPUs from the source. If a CPU was used to run a vCPU in + * the source VM but is never used for the destination VM, then the CPU + * can only have cached memory that was accessible to the source VM. + */ + if (!zalloc_cpumask_var(&dst_sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) { + ret = -ENOMEM; + goto out_source_vcpu; + } + sev_migrate_from(kvm, source_kvm); kvm_vm_dead(source_kvm); cg_cleanup_sev = src_sev; @@ -2131,11 +2181,7 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) return -EINVAL; /* Check for policy bits that must be set */ - if (!(params.policy & SNP_POLICY_MASK_RSVD_MBO) || - !(params.policy & SNP_POLICY_MASK_SMT)) - return -EINVAL; - - if (params.policy & SNP_POLICY_MASK_SINGLE_SOCKET) + if (!(params.policy & SNP_POLICY_MASK_RSVD_MBO)) return -EINVAL; sev->policy = params.policy; @@ -2694,12 +2740,7 @@ int sev_mem_enc_unregister_region(struct kvm *kvm, goto failed; } - /* - * Ensure that all guest tagged cache entries are flushed before - * releasing the pages back to the system for use. CLFLUSH will - * not do this, so issue a WBINVD. - */ - wbinvd_on_all_cpus(); + sev_writeback_caches(kvm); __unregister_enc_region_locked(kvm, region); @@ -2741,13 +2782,18 @@ int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd) goto e_unlock; } + mirror_sev = to_kvm_sev_info(kvm); + if (!zalloc_cpumask_var(&mirror_sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) { + ret = -ENOMEM; + goto e_unlock; + } + /* * The mirror kvm holds an enc_context_owner ref so its asid can't * disappear until we're done with it */ source_sev = to_kvm_sev_info(source_kvm); kvm_get_kvm(source_kvm); - mirror_sev = to_kvm_sev_info(kvm); list_add_tail(&mirror_sev->mirror_entry, &source_sev->mirror_vms); /* Set enc_context_owner and copy its encryption context over */ @@ -2809,7 +2855,13 @@ void sev_vm_destroy(struct kvm *kvm) WARN_ON(!list_empty(&sev->mirror_vms)); - /* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */ + free_cpumask_var(sev->have_run_cpus); + + /* + * If this is a mirror VM, remove it from the owner's list of a mirrors + * and skip ASID cleanup (the ASID is tied to the lifetime of the owner). + * Note, mirror VMs don't support registering encrypted regions. + */ if (is_mirroring_enc_context(kvm)) { struct kvm *owner_kvm = sev->enc_context_owner; @@ -2820,12 +2872,6 @@ void sev_vm_destroy(struct kvm *kvm) return; } - /* - * Ensure that all guest tagged cache entries are flushed before - * releasing the pages back to the system for use. CLFLUSH will - * not do this, so issue a WBINVD. - */ - wbinvd_on_all_cpus(); /* * if userspace was terminated before unregistering the memory regions @@ -2871,6 +2917,33 @@ void __init sev_set_cpu_caps(void) } } +static bool is_sev_snp_initialized(void) +{ + struct sev_user_data_snp_status *status; + struct sev_data_snp_addr buf; + bool initialized = false; + int ret, error = 0; + + status = snp_alloc_firmware_page(GFP_KERNEL | __GFP_ZERO); + if (!status) + return false; + + buf.address = __psp_pa(status); + ret = sev_do_cmd(SEV_CMD_SNP_PLATFORM_STATUS, &buf, &error); + if (ret) { + pr_err("SEV: SNP_PLATFORM_STATUS failed ret=%d, fw_error=%d (%#x)\n", + ret, error, error); + goto out; + } + + initialized = !!status->state; + +out: + snp_free_firmware_page(status); + + return initialized; +} + void __init sev_hardware_setup(void) { unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count; @@ -2975,6 +3048,14 @@ void __init sev_hardware_setup(void) sev_snp_supported = sev_snp_enabled && cc_platform_has(CC_ATTR_HOST_SEV_SNP); out: + if (sev_enabled) { + init_args.probe = true; + if (sev_platform_init(&init_args)) + sev_supported = sev_es_supported = sev_snp_supported = false; + else if (sev_snp_supported) + sev_snp_supported = is_sev_snp_initialized(); + } + if (boot_cpu_has(X86_FEATURE_SEV)) pr_info("SEV %s (ASIDs %u - %u)\n", sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" : @@ -3001,15 +3082,6 @@ out: sev_supported_vmsa_features = 0; if (sev_es_debug_swap_enabled) sev_supported_vmsa_features |= SVM_SEV_FEAT_DEBUG_SWAP; - - if (!sev_enabled) - return; - - /* - * Do both SNP and SEV initialization at KVM module load. - */ - init_args.probe = true; - sev_platform_init(&init_args); } void sev_hardware_unsetup(void) @@ -3069,30 +3141,29 @@ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va) /* * VM Page Flush takes a host virtual address and a guest ASID. Fall - * back to WBINVD if this faults so as not to make any problems worse - * by leaving stale encrypted data in the cache. + * back to full writeback of caches if this faults so as not to make + * any problems worse by leaving stale encrypted data in the cache. */ if (WARN_ON_ONCE(wrmsrq_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid))) - goto do_wbinvd; + goto do_sev_writeback_caches; return; -do_wbinvd: - wbinvd_on_all_cpus(); +do_sev_writeback_caches: + sev_writeback_caches(vcpu->kvm); } void sev_guest_memory_reclaimed(struct kvm *kvm) { /* * With SNP+gmem, private/encrypted memory is unreachable via the - * hva-based mmu notifiers, so these events are only actually - * pertaining to shared pages where there is no need to perform - * the WBINVD to flush associated caches. + * hva-based mmu notifiers, i.e. these events are explicitly scoped to + * shared pages, where there's no need to flush caches. */ if (!sev_guest(kvm) || sev_snp_guest(kvm)) return; - wbinvd_on_all_cpus(); + sev_writeback_caches(kvm); } void sev_free_vcpu(struct kvm_vcpu *vcpu) @@ -3424,6 +3495,15 @@ int pre_sev_run(struct vcpu_svm *svm, int cpu) if (sev_es_guest(kvm) && !VALID_PAGE(svm->vmcb->control.vmsa_pa)) return -EINVAL; + /* + * To optimize cache flushes when memory is reclaimed from an SEV VM, + * track physical CPUs that enter the guest for SEV VMs and thus can + * have encrypted, dirty data in the cache, and flush caches only for + * CPUs that have entered the guest. + */ + if (!cpumask_test_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus)) + cpumask_set_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus); + /* Assign the asid allocated with this SEV guest */ svm->asid = asid; @@ -3856,9 +3936,9 @@ void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu) * From this point forward, the VMSA will always be a guest-mapped page * rather than the initial one allocated by KVM in svm->sev_es.vmsa. In * theory, svm->sev_es.vmsa could be free'd and cleaned up here, but - * that involves cleanups like wbinvd_on_all_cpus() which would ideally - * be handled during teardown rather than guest boot. Deferring that - * also allows the existing logic for SEV-ES VMSAs to be re-used with + * that involves cleanups like flushing caches, which would ideally be + * handled during teardown rather than guest boot. Deferring that also + * allows the existing logic for SEV-ES VMSAs to be re-used with * minimal SNP-specific changes. */ svm->sev_es.snp_has_guest_vmsa = true; @@ -4360,16 +4440,17 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) count, in); } -static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm) +void sev_es_recalc_msr_intercepts(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = &svm->vcpu; + /* Clear intercepts on MSRs that are context switched by hardware. */ + svm_disable_intercept_for_msr(vcpu, MSR_AMD64_SEV_ES_GHCB, MSR_TYPE_RW); + svm_disable_intercept_for_msr(vcpu, MSR_EFER, MSR_TYPE_RW); + svm_disable_intercept_for_msr(vcpu, MSR_IA32_CR_PAT, MSR_TYPE_RW); - if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) { - bool v_tsc_aux = guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP) || - guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID); - - set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, v_tsc_aux, v_tsc_aux); - } + if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) + svm_set_intercept_for_msr(vcpu, MSR_TSC_AUX, MSR_TYPE_RW, + !guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP) && + !guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID)); /* * For SEV-ES, accesses to MSR_IA32_XSS should not be intercepted if @@ -4383,11 +4464,9 @@ static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm) * XSAVES being exposed to the guest so that KVM can at least honor * guest CPUID for RDMSR and WRMSR. */ - if (guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVES) && - guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) - set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 1, 1); - else - set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 0, 0); + svm_set_intercept_for_msr(vcpu, MSR_IA32_XSS, MSR_TYPE_RW, + !guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVES) || + !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)); } void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm) @@ -4399,16 +4478,12 @@ void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm) best = kvm_find_cpuid_entry(vcpu, 0x8000001F); if (best) vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f)); - - if (sev_es_guest(svm->vcpu.kvm)) - sev_es_vcpu_after_set_cpuid(svm); } static void sev_es_init_vmcb(struct vcpu_svm *svm) { struct kvm_sev_info *sev = to_kvm_sev_info(svm->vcpu.kvm); struct vmcb *vmcb = svm->vmcb01.ptr; - struct kvm_vcpu *vcpu = &svm->vcpu; svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE; @@ -4419,8 +4494,12 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) * the VMSA will be NULL if this vCPU is the destination for intrahost * migration, and will be copied later. */ - if (svm->sev_es.vmsa && !svm->sev_es.snp_has_guest_vmsa) - svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); + if (!svm->sev_es.snp_has_guest_vmsa) { + if (svm->sev_es.vmsa) + svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); + else + svm->vmcb->control.vmsa_pa = INVALID_PAGE; + } if (cpu_feature_enabled(X86_FEATURE_ALLOWED_SEV_FEATURES)) svm->vmcb->control.allowed_sev_features = sev->vmsa_features | @@ -4462,10 +4541,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) /* Can't intercept XSETBV, HV can't modify XCR0 directly */ svm_clr_intercept(svm, INTERCEPT_XSETBV); - - /* Clear intercepts on selected MSRs */ - set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1); - set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1); } void sev_init_vmcb(struct vcpu_svm *svm) @@ -4854,7 +4929,7 @@ void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end) /* * SEV-ES avoids host/guest cache coherency issues through - * WBINVD hooks issued via MMU notifiers during run-time, and + * WBNOINVD hooks issued via MMU notifiers during run-time, and * KVM's VM destroy path at shutdown. Those MMU notifier events * don't cover gmem since there is no requirement to map pages * to a HVA in order to use them for a running guest. While the |