diff options
Diffstat (limited to 'arch/x86/kvm/svm/sev.c')
-rw-r--r-- | arch/x86/kvm/svm/sev.c | 417 |
1 files changed, 274 insertions, 143 deletions
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0bc708ee2788..2fbdebf79fbb 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -26,6 +26,7 @@ #include <asm/fpu/xcr.h> #include <asm/fpu/xstate.h> #include <asm/debugreg.h> +#include <asm/msr.h> #include <asm/sev.h> #include "mmu.h" @@ -116,6 +117,7 @@ static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid) */ down_write(&sev_deactivate_lock); + /* SNP firmware requires use of WBINVD for ASID recycling. */ wbinvd_on_all_cpus(); if (sev_snp_enabled) @@ -445,7 +447,12 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, init_args.probe = false; ret = sev_platform_init(&init_args); if (ret) - goto e_free; + goto e_free_asid; + + if (!zalloc_cpumask_var(&sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) { + ret = -ENOMEM; + goto e_free_asid; + } /* This needs to happen after SEV/SNP firmware initialization. */ if (vm_type == KVM_X86_SNP_VM) { @@ -463,6 +470,8 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, return 0; e_free: + free_cpumask_var(sev->have_run_cpus); +e_free_asid: argp->error = init_args.error; sev_asid_free(sev); sev->asid = 0; @@ -560,6 +569,8 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(params))) return -EFAULT; + sev->policy = params.policy; + memset(&start, 0, sizeof(start)); dh_blob = NULL; @@ -705,6 +716,33 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages) } } +static void sev_writeback_caches(struct kvm *kvm) +{ + /* + * Note, the caller is responsible for ensuring correctness if the mask + * can be modified, e.g. if a CPU could be doing VMRUN. + */ + if (cpumask_empty(to_kvm_sev_info(kvm)->have_run_cpus)) + return; + + /* + * Ensure that all dirty guest tagged cache entries are written back + * before releasing the pages back to the system for use. CLFLUSH will + * not do this without SME_COHERENT, and flushing many cache lines + * individually is slower than blasting WBINVD for large VMs, so issue + * WBNOINVD (or WBINVD if the "no invalidate" variant is unsupported) + * on CPUs that have done VMRUN, i.e. may have dirtied data using the + * VM's ASID. + * + * For simplicity, never remove CPUs from the bitmap. Ideally, KVM + * would clear the mask when flushing caches, but doing so requires + * serializing multiple calls and having responding CPUs (to the IPI) + * mark themselves as still running if they are running (or about to + * run) a vCPU for the VM. + */ + wbnoinvd_on_cpus_mask(to_kvm_sev_info(kvm)->have_run_cpus); +} + static unsigned long get_num_contig_pages(unsigned long idx, struct page **inpages, unsigned long npages) { @@ -1592,11 +1630,11 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) /* allocate memory for header and transport buffer */ ret = -ENOMEM; - hdr = kzalloc(params.hdr_len, GFP_KERNEL_ACCOUNT); + hdr = kzalloc(params.hdr_len, GFP_KERNEL); if (!hdr) goto e_unpin; - trans_data = kzalloc(params.trans_len, GFP_KERNEL_ACCOUNT); + trans_data = kzalloc(params.trans_len, GFP_KERNEL); if (!trans_data) goto e_free_hdr; @@ -1882,70 +1920,6 @@ static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) atomic_set_release(&src_sev->migration_in_progress, 0); } -/* vCPU mutex subclasses. */ -enum sev_migration_role { - SEV_MIGRATION_SOURCE = 0, - SEV_MIGRATION_TARGET, - SEV_NR_MIGRATION_ROLES, -}; - -static int sev_lock_vcpus_for_migration(struct kvm *kvm, - enum sev_migration_role role) -{ - struct kvm_vcpu *vcpu; - unsigned long i, j; - - kvm_for_each_vcpu(i, vcpu, kvm) { - if (mutex_lock_killable_nested(&vcpu->mutex, role)) - goto out_unlock; - -#ifdef CONFIG_PROVE_LOCKING - if (!i) - /* - * Reset the role to one that avoids colliding with - * the role used for the first vcpu mutex. - */ - role = SEV_NR_MIGRATION_ROLES; - else - mutex_release(&vcpu->mutex.dep_map, _THIS_IP_); -#endif - } - - return 0; - -out_unlock: - - kvm_for_each_vcpu(j, vcpu, kvm) { - if (i == j) - break; - -#ifdef CONFIG_PROVE_LOCKING - if (j) - mutex_acquire(&vcpu->mutex.dep_map, role, 0, _THIS_IP_); -#endif - - mutex_unlock(&vcpu->mutex); - } - return -EINTR; -} - -static void sev_unlock_vcpus_for_migration(struct kvm *kvm) -{ - struct kvm_vcpu *vcpu; - unsigned long i; - bool first = true; - - kvm_for_each_vcpu(i, vcpu, kvm) { - if (first) - first = false; - else - mutex_acquire(&vcpu->mutex.dep_map, - SEV_NR_MIGRATION_ROLES, 0, _THIS_IP_); - - mutex_unlock(&vcpu->mutex); - } -} - static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) { struct kvm_sev_info *dst = to_kvm_sev_info(dst_kvm); @@ -2032,6 +2006,10 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src) struct kvm_vcpu *src_vcpu; unsigned long i; + if (src->created_vcpus != atomic_read(&src->online_vcpus) || + dst->created_vcpus != atomic_read(&dst->online_vcpus)) + return -EBUSY; + if (!sev_es_guest(src)) return 0; @@ -2083,10 +2061,10 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) charged = true; } - ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE); + ret = kvm_lock_all_vcpus(kvm); if (ret) goto out_dst_cgroup; - ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET); + ret = kvm_lock_all_vcpus(source_kvm); if (ret) goto out_dst_vcpu; @@ -2094,15 +2072,26 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) if (ret) goto out_source_vcpu; + /* + * Allocate a new have_run_cpus for the destination, i.e. don't copy + * the set of CPUs from the source. If a CPU was used to run a vCPU in + * the source VM but is never used for the destination VM, then the CPU + * can only have cached memory that was accessible to the source VM. + */ + if (!zalloc_cpumask_var(&dst_sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) { + ret = -ENOMEM; + goto out_source_vcpu; + } + sev_migrate_from(kvm, source_kvm); kvm_vm_dead(source_kvm); cg_cleanup_sev = src_sev; ret = 0; out_source_vcpu: - sev_unlock_vcpus_for_migration(source_kvm); + kvm_unlock_all_vcpus(source_kvm); out_dst_vcpu: - sev_unlock_vcpus_for_migration(kvm); + kvm_unlock_all_vcpus(kvm); out_dst_cgroup: /* Operates on the source on success, on the destination on failure. */ if (charged) @@ -2192,12 +2181,10 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) return -EINVAL; /* Check for policy bits that must be set */ - if (!(params.policy & SNP_POLICY_MASK_RSVD_MBO) || - !(params.policy & SNP_POLICY_MASK_SMT)) + if (!(params.policy & SNP_POLICY_MASK_RSVD_MBO)) return -EINVAL; - if (params.policy & SNP_POLICY_MASK_SINGLE_SOCKET) - return -EINVAL; + sev->policy = params.policy; sev->snp_context = snp_context_create(kvm, argp); if (!sev->snp_context) @@ -2753,12 +2740,7 @@ int sev_mem_enc_unregister_region(struct kvm *kvm, goto failed; } - /* - * Ensure that all guest tagged cache entries are flushed before - * releasing the pages back to the system for use. CLFLUSH will - * not do this, so issue a WBINVD. - */ - wbinvd_on_all_cpus(); + sev_writeback_caches(kvm); __unregister_enc_region_locked(kvm, region); @@ -2800,13 +2782,18 @@ int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd) goto e_unlock; } + mirror_sev = to_kvm_sev_info(kvm); + if (!zalloc_cpumask_var(&mirror_sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) { + ret = -ENOMEM; + goto e_unlock; + } + /* * The mirror kvm holds an enc_context_owner ref so its asid can't * disappear until we're done with it */ source_sev = to_kvm_sev_info(source_kvm); kvm_get_kvm(source_kvm); - mirror_sev = to_kvm_sev_info(kvm); list_add_tail(&mirror_sev->mirror_entry, &source_sev->mirror_vms); /* Set enc_context_owner and copy its encryption context over */ @@ -2868,7 +2855,13 @@ void sev_vm_destroy(struct kvm *kvm) WARN_ON(!list_empty(&sev->mirror_vms)); - /* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */ + free_cpumask_var(sev->have_run_cpus); + + /* + * If this is a mirror VM, remove it from the owner's list of a mirrors + * and skip ASID cleanup (the ASID is tied to the lifetime of the owner). + * Note, mirror VMs don't support registering encrypted regions. + */ if (is_mirroring_enc_context(kvm)) { struct kvm *owner_kvm = sev->enc_context_owner; @@ -2879,12 +2872,6 @@ void sev_vm_destroy(struct kvm *kvm) return; } - /* - * Ensure that all guest tagged cache entries are flushed before - * releasing the pages back to the system for use. CLFLUSH will - * not do this, so issue a WBINVD. - */ - wbinvd_on_all_cpus(); /* * if userspace was terminated before unregistering the memory regions @@ -2930,9 +2917,37 @@ void __init sev_set_cpu_caps(void) } } +static bool is_sev_snp_initialized(void) +{ + struct sev_user_data_snp_status *status; + struct sev_data_snp_addr buf; + bool initialized = false; + int ret, error = 0; + + status = snp_alloc_firmware_page(GFP_KERNEL | __GFP_ZERO); + if (!status) + return false; + + buf.address = __psp_pa(status); + ret = sev_do_cmd(SEV_CMD_SNP_PLATFORM_STATUS, &buf, &error); + if (ret) { + pr_err("SEV: SNP_PLATFORM_STATUS failed ret=%d, fw_error=%d (%#x)\n", + ret, error, error); + goto out; + } + + initialized = !!status->state; + +out: + snp_free_firmware_page(status); + + return initialized; +} + void __init sev_hardware_setup(void) { unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count; + struct sev_platform_init_args init_args = {0}; bool sev_snp_supported = false; bool sev_es_supported = false; bool sev_supported = false; @@ -3033,6 +3048,14 @@ void __init sev_hardware_setup(void) sev_snp_supported = sev_snp_enabled && cc_platform_has(CC_ATTR_HOST_SEV_SNP); out: + if (sev_enabled) { + init_args.probe = true; + if (sev_platform_init(&init_args)) + sev_supported = sev_es_supported = sev_snp_supported = false; + else if (sev_snp_supported) + sev_snp_supported = is_sev_snp_initialized(); + } + if (boot_cpu_has(X86_FEATURE_SEV)) pr_info("SEV %s (ASIDs %u - %u)\n", sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" : @@ -3074,6 +3097,8 @@ void sev_hardware_unsetup(void) misc_cg_set_capacity(MISC_CG_RES_SEV, 0); misc_cg_set_capacity(MISC_CG_RES_SEV_ES, 0); + + sev_platform_shutdown(); } int sev_cpu_init(struct svm_cpu_data *sd) @@ -3116,30 +3141,29 @@ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va) /* * VM Page Flush takes a host virtual address and a guest ASID. Fall - * back to WBINVD if this faults so as not to make any problems worse - * by leaving stale encrypted data in the cache. + * back to full writeback of caches if this faults so as not to make + * any problems worse by leaving stale encrypted data in the cache. */ - if (WARN_ON_ONCE(wrmsrl_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid))) - goto do_wbinvd; + if (WARN_ON_ONCE(wrmsrq_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid))) + goto do_sev_writeback_caches; return; -do_wbinvd: - wbinvd_on_all_cpus(); +do_sev_writeback_caches: + sev_writeback_caches(vcpu->kvm); } void sev_guest_memory_reclaimed(struct kvm *kvm) { /* * With SNP+gmem, private/encrypted memory is unreachable via the - * hva-based mmu notifiers, so these events are only actually - * pertaining to shared pages where there is no need to perform - * the WBINVD to flush associated caches. + * hva-based mmu notifiers, i.e. these events are explicitly scoped to + * shared pages, where there's no need to flush caches. */ if (!sev_guest(kvm) || sev_snp_guest(kvm)) return; - wbinvd_on_all_cpus(); + sev_writeback_caches(kvm); } void sev_free_vcpu(struct kvm_vcpu *vcpu) @@ -3173,9 +3197,14 @@ skip_vmsa_free: kvfree(svm->sev_es.ghcb_sa); } +static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control) +{ + return (((u64)control->exit_code_hi) << 32) | control->exit_code; +} + static void dump_ghcb(struct vcpu_svm *svm) { - struct ghcb *ghcb = svm->sev_es.ghcb; + struct vmcb_control_area *control = &svm->vmcb->control; unsigned int nbits; /* Re-use the dump_invalid_vmcb module parameter */ @@ -3184,18 +3213,24 @@ static void dump_ghcb(struct vcpu_svm *svm) return; } - nbits = sizeof(ghcb->save.valid_bitmap) * 8; + nbits = sizeof(svm->sev_es.valid_bitmap) * 8; - pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa); + /* + * Print KVM's snapshot of the GHCB values that were (unsuccessfully) + * used to handle the exit. If the guest has since modified the GHCB + * itself, dumping the raw GHCB won't help debug why KVM was unable to + * handle the VMGEXIT that KVM observed. + */ + pr_err("GHCB (GPA=%016llx) snapshot:\n", svm->vmcb->control.ghcb_gpa); pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code", - ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb)); + kvm_ghcb_get_sw_exit_code(control), kvm_ghcb_sw_exit_code_is_valid(svm)); pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1", - ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb)); + control->exit_info_1, kvm_ghcb_sw_exit_info_1_is_valid(svm)); pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2", - ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb)); + control->exit_info_2, kvm_ghcb_sw_exit_info_2_is_valid(svm)); pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch", - ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb)); - pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap); + svm->sev_es.sw_scratch, kvm_ghcb_sw_scratch_is_valid(svm)); + pr_err("%-20s%*pb\n", "valid_bitmap", nbits, svm->sev_es.valid_bitmap); } static void sev_es_sync_to_ghcb(struct vcpu_svm *svm) @@ -3266,11 +3301,6 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); } -static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control) -{ - return (((u64)control->exit_code_hi) << 32) | control->exit_code; -} - static int sev_es_validate_vmgexit(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -3465,6 +3495,15 @@ int pre_sev_run(struct vcpu_svm *svm, int cpu) if (sev_es_guest(kvm) && !VALID_PAGE(svm->vmcb->control.vmsa_pa)) return -EINVAL; + /* + * To optimize cache flushes when memory is reclaimed from an SEV VM, + * track physical CPUs that enter the guest for SEV VMs and thus can + * have encrypted, dirty data in the cache, and flush caches only for + * CPUs that have entered the guest. + */ + if (!cpumask_test_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus)) + cpumask_set_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus); + /* Assign the asid allocated with this SEV guest */ svm->asid = asid; @@ -3897,9 +3936,9 @@ void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu) * From this point forward, the VMSA will always be a guest-mapped page * rather than the initial one allocated by KVM in svm->sev_es.vmsa. In * theory, svm->sev_es.vmsa could be free'd and cleaned up here, but - * that involves cleanups like wbinvd_on_all_cpus() which would ideally - * be handled during teardown rather than guest boot. Deferring that - * also allows the existing logic for SEV-ES VMSAs to be re-used with + * that involves cleanups like flushing caches, which would ideally be + * handled during teardown rather than guest boot. Deferring that also + * allows the existing logic for SEV-ES VMSAs to be re-used with * minimal SNP-specific changes. */ svm->sev_es.snp_has_guest_vmsa = true; @@ -3988,10 +4027,8 @@ static int sev_snp_ap_creation(struct vcpu_svm *svm) * Unless Creation is deferred until INIT, signal the vCPU to update * its state. */ - if (request != SVM_VMGEXIT_AP_CREATE_ON_INIT) { - kvm_make_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, target_vcpu); - kvm_vcpu_kick(target_vcpu); - } + if (request != SVM_VMGEXIT_AP_CREATE_ON_INIT) + kvm_make_request_and_kick(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, target_vcpu); return 0; } @@ -4403,16 +4440,17 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) count, in); } -static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm) +void sev_es_recalc_msr_intercepts(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = &svm->vcpu; + /* Clear intercepts on MSRs that are context switched by hardware. */ + svm_disable_intercept_for_msr(vcpu, MSR_AMD64_SEV_ES_GHCB, MSR_TYPE_RW); + svm_disable_intercept_for_msr(vcpu, MSR_EFER, MSR_TYPE_RW); + svm_disable_intercept_for_msr(vcpu, MSR_IA32_CR_PAT, MSR_TYPE_RW); - if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) { - bool v_tsc_aux = guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP) || - guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID); - - set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, v_tsc_aux, v_tsc_aux); - } + if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) + svm_set_intercept_for_msr(vcpu, MSR_TSC_AUX, MSR_TYPE_RW, + !guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP) && + !guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID)); /* * For SEV-ES, accesses to MSR_IA32_XSS should not be intercepted if @@ -4426,11 +4464,9 @@ static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm) * XSAVES being exposed to the guest so that KVM can at least honor * guest CPUID for RDMSR and WRMSR. */ - if (guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVES) && - guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) - set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 1, 1); - else - set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 0, 0); + svm_set_intercept_for_msr(vcpu, MSR_IA32_XSS, MSR_TYPE_RW, + !guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVES) || + !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)); } void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm) @@ -4442,15 +4478,12 @@ void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm) best = kvm_find_cpuid_entry(vcpu, 0x8000001F); if (best) vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f)); - - if (sev_es_guest(svm->vcpu.kvm)) - sev_es_vcpu_after_set_cpuid(svm); } static void sev_es_init_vmcb(struct vcpu_svm *svm) { + struct kvm_sev_info *sev = to_kvm_sev_info(svm->vcpu.kvm); struct vmcb *vmcb = svm->vmcb01.ptr; - struct kvm_vcpu *vcpu = &svm->vcpu; svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE; @@ -4461,8 +4494,16 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) * the VMSA will be NULL if this vCPU is the destination for intrahost * migration, and will be copied later. */ - if (svm->sev_es.vmsa && !svm->sev_es.snp_has_guest_vmsa) - svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); + if (!svm->sev_es.snp_has_guest_vmsa) { + if (svm->sev_es.vmsa) + svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); + else + svm->vmcb->control.vmsa_pa = INVALID_PAGE; + } + + if (cpu_feature_enabled(X86_FEATURE_ALLOWED_SEV_FEATURES)) + svm->vmcb->control.allowed_sev_features = sev->vmsa_features | + VMCB_ALLOWED_SEV_FEATURES_VALID; /* Can't intercept CR register access, HV can't modify CR registers */ svm_clr_intercept(svm, INTERCEPT_CR0_READ); @@ -4500,10 +4541,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) /* Can't intercept XSETBV, HV can't modify XCR0 directly */ svm_clr_intercept(svm, INTERCEPT_XSETBV); - - /* Clear intercepts on selected MSRs */ - set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1); - set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1); } void sev_init_vmcb(struct vcpu_svm *svm) @@ -4892,7 +4929,7 @@ void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end) /* * SEV-ES avoids host/guest cache coherency issues through - * WBINVD hooks issued via MMU notifiers during run-time, and + * WBNOINVD hooks issued via MMU notifiers during run-time, and * KVM's VM destroy path at shutdown. Those MMU notifier events * don't cover gmem since there is no requirement to map pages * to a HVA in order to use them for a running guest. While the @@ -4924,3 +4961,97 @@ int sev_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn) return level; } + +struct vmcb_save_area *sev_decrypt_vmsa(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb_save_area *vmsa; + struct kvm_sev_info *sev; + int error = 0; + int ret; + + if (!sev_es_guest(vcpu->kvm)) + return NULL; + + /* + * If the VMSA has not yet been encrypted, return a pointer to the + * current un-encrypted VMSA. + */ + if (!vcpu->arch.guest_state_protected) + return (struct vmcb_save_area *)svm->sev_es.vmsa; + + sev = to_kvm_sev_info(vcpu->kvm); + + /* Check if the SEV policy allows debugging */ + if (sev_snp_guest(vcpu->kvm)) { + if (!(sev->policy & SNP_POLICY_DEBUG)) + return NULL; + } else { + if (sev->policy & SEV_POLICY_NODBG) + return NULL; + } + + if (sev_snp_guest(vcpu->kvm)) { + struct sev_data_snp_dbg dbg = {0}; + + vmsa = snp_alloc_firmware_page(__GFP_ZERO); + if (!vmsa) + return NULL; + + dbg.gctx_paddr = __psp_pa(sev->snp_context); + dbg.src_addr = svm->vmcb->control.vmsa_pa; + dbg.dst_addr = __psp_pa(vmsa); + + ret = sev_do_cmd(SEV_CMD_SNP_DBG_DECRYPT, &dbg, &error); + + /* + * Return the target page to a hypervisor page no matter what. + * If this fails, the page can't be used, so leak it and don't + * try to use it. + */ + if (snp_page_reclaim(vcpu->kvm, PHYS_PFN(__pa(vmsa)))) + return NULL; + + if (ret) { + pr_err("SEV: SNP_DBG_DECRYPT failed ret=%d, fw_error=%d (%#x)\n", + ret, error, error); + free_page((unsigned long)vmsa); + + return NULL; + } + } else { + struct sev_data_dbg dbg = {0}; + struct page *vmsa_page; + + vmsa_page = alloc_page(GFP_KERNEL); + if (!vmsa_page) + return NULL; + + vmsa = page_address(vmsa_page); + + dbg.handle = sev->handle; + dbg.src_addr = svm->vmcb->control.vmsa_pa; + dbg.dst_addr = __psp_pa(vmsa); + dbg.len = PAGE_SIZE; + + ret = sev_do_cmd(SEV_CMD_DBG_DECRYPT, &dbg, &error); + if (ret) { + pr_err("SEV: SEV_CMD_DBG_DECRYPT failed ret=%d, fw_error=%d (0x%x)\n", + ret, error, error); + __free_page(vmsa_page); + + return NULL; + } + } + + return vmsa; +} + +void sev_free_decrypted_vmsa(struct kvm_vcpu *vcpu, struct vmcb_save_area *vmsa) +{ + /* If the VMSA has not yet been encrypted, nothing was allocated */ + if (!vcpu->arch.guest_state_protected || !vmsa) + return; + + free_page((unsigned long)vmsa); +} |