summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/svm/sev.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/svm/sev.c')
-rw-r--r--arch/x86/kvm/svm/sev.c1093
1 files changed, 659 insertions, 434 deletions
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 943bd074a5d3..f59c65abe3cf 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -26,6 +26,7 @@
#include <asm/fpu/xcr.h>
#include <asm/fpu/xstate.h>
#include <asm/debugreg.h>
+#include <asm/msr.h>
#include <asm/sev.h>
#include "mmu.h"
@@ -36,7 +37,6 @@
#include "trace.h"
#define GHCB_VERSION_MAX 2ULL
-#define GHCB_VERSION_DEFAULT 2ULL
#define GHCB_VERSION_MIN 1ULL
#define GHCB_HV_FT_SUPPORTED (GHCB_HV_FT_SNP | GHCB_HV_FT_SNP_AP_CREATION)
@@ -58,24 +58,31 @@ static bool sev_es_debug_swap_enabled = true;
module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444);
static u64 sev_supported_vmsa_features;
+static unsigned int nr_ciphertext_hiding_asids;
+module_param_named(ciphertext_hiding_asids, nr_ciphertext_hiding_asids, uint, 0444);
+
#define AP_RESET_HOLD_NONE 0
#define AP_RESET_HOLD_NAE_EVENT 1
#define AP_RESET_HOLD_MSR_PROTO 2
-/* As defined by SEV-SNP Firmware ABI, under "Guest Policy". */
-#define SNP_POLICY_MASK_API_MINOR GENMASK_ULL(7, 0)
-#define SNP_POLICY_MASK_API_MAJOR GENMASK_ULL(15, 8)
-#define SNP_POLICY_MASK_SMT BIT_ULL(16)
-#define SNP_POLICY_MASK_RSVD_MBO BIT_ULL(17)
-#define SNP_POLICY_MASK_DEBUG BIT_ULL(19)
-#define SNP_POLICY_MASK_SINGLE_SOCKET BIT_ULL(20)
-
-#define SNP_POLICY_MASK_VALID (SNP_POLICY_MASK_API_MINOR | \
- SNP_POLICY_MASK_API_MAJOR | \
- SNP_POLICY_MASK_SMT | \
- SNP_POLICY_MASK_RSVD_MBO | \
- SNP_POLICY_MASK_DEBUG | \
- SNP_POLICY_MASK_SINGLE_SOCKET)
+/*
+ * SEV-SNP policy bits that can be supported by KVM. These include policy bits
+ * that have implementation support within KVM or policy bits that do not
+ * require implementation support within KVM to enforce the policy.
+ */
+#define KVM_SNP_POLICY_MASK_VALID (SNP_POLICY_MASK_API_MINOR | \
+ SNP_POLICY_MASK_API_MAJOR | \
+ SNP_POLICY_MASK_SMT | \
+ SNP_POLICY_MASK_RSVD_MBO | \
+ SNP_POLICY_MASK_DEBUG | \
+ SNP_POLICY_MASK_SINGLE_SOCKET | \
+ SNP_POLICY_MASK_CXL_ALLOW | \
+ SNP_POLICY_MASK_MEM_AES_256_XTS | \
+ SNP_POLICY_MASK_RAPL_DIS | \
+ SNP_POLICY_MASK_CIPHERTEXT_HIDING_DRAM | \
+ SNP_POLICY_MASK_PAGE_SWAP_DISABLE)
+
+static u64 snp_supported_policy_bits __ro_after_init;
#define INITIAL_VMSA_GPA 0xFFFFFFFFF000
@@ -84,6 +91,10 @@ static DECLARE_RWSEM(sev_deactivate_lock);
static DEFINE_MUTEX(sev_bitmap_lock);
unsigned int max_sev_asid;
static unsigned int min_sev_asid;
+static unsigned int max_sev_es_asid;
+static unsigned int min_sev_es_asid;
+static unsigned int max_snp_asid;
+static unsigned int min_snp_asid;
static unsigned long sev_me_mask;
static unsigned int nr_asids;
static unsigned long *sev_asid_bitmap;
@@ -116,6 +127,7 @@ static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid)
*/
down_write(&sev_deactivate_lock);
+ /* SNP firmware requires use of WBINVD for ASID recycling. */
wbinvd_on_all_cpus();
if (sev_snp_enabled)
@@ -140,11 +152,19 @@ static inline bool is_mirroring_enc_context(struct kvm *kvm)
static bool sev_vcpu_has_debug_swap(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
- struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(vcpu->kvm);
return sev->vmsa_features & SVM_SEV_FEAT_DEBUG_SWAP;
}
+static bool snp_is_secure_tsc_enabled(struct kvm *kvm)
+{
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
+
+ return (sev->vmsa_features & SVM_SEV_FEAT_SECURE_TSC) &&
+ !WARN_ON_ONCE(!sev_snp_guest(kvm));
+}
+
/* Must be called with the sev_bitmap_lock held */
static bool __sev_recycle_asids(unsigned int min_asid, unsigned int max_asid)
{
@@ -171,20 +191,34 @@ static void sev_misc_cg_uncharge(struct kvm_sev_info *sev)
misc_cg_uncharge(type, sev->misc_cg, 1);
}
-static int sev_asid_new(struct kvm_sev_info *sev)
+static int sev_asid_new(struct kvm_sev_info *sev, unsigned long vm_type)
{
/*
* SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
* SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
- * Note: min ASID can end up larger than the max if basic SEV support is
- * effectively disabled by disallowing use of ASIDs for SEV guests.
*/
- unsigned int min_asid = sev->es_active ? 1 : min_sev_asid;
- unsigned int max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
- unsigned int asid;
+ unsigned int min_asid, max_asid, asid;
bool retry = true;
int ret;
+ if (vm_type == KVM_X86_SNP_VM) {
+ min_asid = min_snp_asid;
+ max_asid = max_snp_asid;
+ } else if (sev->es_active) {
+ min_asid = min_sev_es_asid;
+ max_asid = max_sev_es_asid;
+ } else {
+ min_asid = min_sev_asid;
+ max_asid = max_sev_asid;
+ }
+
+ /*
+ * The min ASID can end up larger than the max if basic SEV support is
+ * effectively disabled by disallowing use of ASIDs for SEV guests.
+ * Similarly for SEV-ES guests the min ASID can end up larger than the
+ * max when ciphertext hiding is enabled, effectively disabling SEV-ES
+ * support.
+ */
if (min_asid > max_asid)
return -ENOTTY;
@@ -226,9 +260,7 @@ e_uncharge:
static unsigned int sev_get_asid(struct kvm *kvm)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
-
- return sev->asid;
+ return to_kvm_sev_info(kvm)->asid;
}
static void sev_asid_free(struct kvm_sev_info *sev)
@@ -403,9 +435,10 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
struct kvm_sev_init *data,
unsigned long vm_type)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct sev_platform_init_args init_args = {0};
bool es_active = vm_type != KVM_X86_SEV_VM;
+ bool snp_active = vm_type == KVM_X86_SNP_VM;
u64 valid_vmsa_features = es_active ? sev_supported_vmsa_features : 0;
int ret;
@@ -415,12 +448,26 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
if (data->flags)
return -EINVAL;
+ if (!snp_active)
+ valid_vmsa_features &= ~SVM_SEV_FEAT_SECURE_TSC;
+
if (data->vmsa_features & ~valid_vmsa_features)
return -EINVAL;
if (data->ghcb_version > GHCB_VERSION_MAX || (!es_active && data->ghcb_version))
return -EINVAL;
+ /*
+ * KVM supports the full range of mandatory features defined by version
+ * 2 of the GHCB protocol, so default to that for SEV-ES guests created
+ * via KVM_SEV_INIT2 (KVM_SEV_INIT forces version 1).
+ */
+ if (es_active && !data->ghcb_version)
+ data->ghcb_version = 2;
+
+ if (snp_active && data->ghcb_version < 2)
+ return -EINVAL;
+
if (unlikely(sev->active))
return -EINVAL;
@@ -429,28 +476,25 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
sev->vmsa_features = data->vmsa_features;
sev->ghcb_version = data->ghcb_version;
- /*
- * Currently KVM supports the full range of mandatory features defined
- * by version 2 of the GHCB protocol, so default to that for SEV-ES
- * guests created via KVM_SEV_INIT2.
- */
- if (sev->es_active && !sev->ghcb_version)
- sev->ghcb_version = GHCB_VERSION_DEFAULT;
-
- if (vm_type == KVM_X86_SNP_VM)
+ if (snp_active)
sev->vmsa_features |= SVM_SEV_FEAT_SNP_ACTIVE;
- ret = sev_asid_new(sev);
+ ret = sev_asid_new(sev, vm_type);
if (ret)
goto e_no_asid;
init_args.probe = false;
ret = sev_platform_init(&init_args);
if (ret)
- goto e_free;
+ goto e_free_asid;
+
+ if (!zalloc_cpumask_var(&sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) {
+ ret = -ENOMEM;
+ goto e_free_asid;
+ }
/* This needs to happen after SEV/SNP firmware initialization. */
- if (vm_type == KVM_X86_SNP_VM) {
+ if (snp_active) {
ret = snp_guest_req_init(kvm);
if (ret)
goto e_free;
@@ -465,6 +509,8 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
return 0;
e_free:
+ free_cpumask_var(sev->have_run_cpus);
+e_free_asid:
argp->error = init_args.error;
sev_asid_free(sev);
sev->asid = 0;
@@ -500,10 +546,9 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
static int sev_guest_init2(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_init data;
- if (!sev->need_init)
+ if (!to_kvm_sev_info(kvm)->need_init)
return -EINVAL;
if (kvm->arch.vm_type != KVM_X86_SEV_VM &&
@@ -543,14 +588,14 @@ static int __sev_issue_cmd(int fd, int id, void *data, int *error)
static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
return __sev_issue_cmd(sev->fd, id, data, error);
}
static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct sev_data_launch_start start;
struct kvm_sev_launch_start params;
void *dh_blob, *session_blob;
@@ -610,6 +655,7 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
goto e_free_session;
}
+ sev->policy = params.policy;
sev->handle = start.handle;
sev->fd = argp->sev_fd;
@@ -622,9 +668,9 @@ e_free_dh:
static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
unsigned long ulen, unsigned long *n,
- int write)
+ unsigned int flags)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
unsigned long npages, size;
int npinned;
unsigned long locked, lock_limit;
@@ -663,7 +709,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
return ERR_PTR(-ENOMEM);
/* Pin the user virtual address. */
- npinned = pin_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
+ npinned = pin_user_pages_fast(uaddr, npages, flags, pages);
if (npinned != npages) {
pr_err("SEV: Failure locking %lu pages.\n", npages);
ret = -ENOMEM;
@@ -686,11 +732,9 @@ err:
static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
unsigned long npages)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
-
unpin_user_pages(pages, npages);
kvfree(pages);
- sev->pages_locked -= npages;
+ to_kvm_sev_info(kvm)->pages_locked -= npages;
}
static void sev_clflush_pages(struct page *pages[], unsigned long npages)
@@ -710,6 +754,29 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
}
}
+static void sev_writeback_caches(struct kvm *kvm)
+{
+ /*
+ * Ensure that all dirty guest tagged cache entries are written back
+ * before releasing the pages back to the system for use. CLFLUSH will
+ * not do this without SME_COHERENT, and flushing many cache lines
+ * individually is slower than blasting WBINVD for large VMs, so issue
+ * WBNOINVD (or WBINVD if the "no invalidate" variant is unsupported)
+ * on CPUs that have done VMRUN, i.e. may have dirtied data using the
+ * VM's ASID.
+ *
+ * For simplicity, never remove CPUs from the bitmap. Ideally, KVM
+ * would clear the mask when flushing caches, but doing so requires
+ * serializing multiple calls and having responding CPUs (to the IPI)
+ * mark themselves as still running if they are running (or about to
+ * run) a vCPU for the VM.
+ *
+ * Note, the caller is responsible for ensuring correctness if the mask
+ * can be modified, e.g. if a CPU could be doing VMRUN.
+ */
+ wbnoinvd_on_cpus_mask(to_kvm_sev_info(kvm)->have_run_cpus);
+}
+
static unsigned long get_num_contig_pages(unsigned long idx,
struct page **inpages, unsigned long npages)
{
@@ -734,7 +801,6 @@ static unsigned long get_num_contig_pages(unsigned long idx,
static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_launch_update_data params;
struct sev_data_launch_update_data data;
struct page **inpages;
@@ -751,7 +817,7 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
vaddr_end = vaddr + size;
/* Lock the user memory. */
- inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
+ inpages = sev_pin_memory(kvm, vaddr, size, &npages, FOLL_WRITE);
if (IS_ERR(inpages))
return PTR_ERR(inpages);
@@ -762,7 +828,7 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
sev_clflush_pages(inpages, npages);
data.reserved = 0;
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
int offset, len;
@@ -802,7 +868,7 @@ e_unpin:
static int sev_es_sync_vmsa(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
- struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(vcpu->kvm);
struct sev_es_save_area *save = svm->sev_es.vmsa;
struct xregs_state *xsave;
const u8 *s;
@@ -972,7 +1038,6 @@ static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
void __user *measure = u64_to_user_ptr(argp->data);
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_launch_measure data;
struct kvm_sev_launch_measure params;
void __user *p = NULL;
@@ -1005,7 +1070,7 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
}
cmd:
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, &data, &argp->error);
/*
@@ -1033,19 +1098,17 @@ e_free_blob:
static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_launch_finish data;
if (!sev_guest(kvm))
return -ENOTTY;
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
return sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, &data, &argp->error);
}
static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_guest_status params;
struct sev_data_guest_status data;
int ret;
@@ -1055,7 +1118,7 @@ static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
memset(&data, 0, sizeof(data));
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, &data, &argp->error);
if (ret)
return ret;
@@ -1074,11 +1137,10 @@ static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
unsigned long dst, int size,
int *error, bool enc)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_dbg data;
data.reserved = 0;
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
data.dst_addr = dst;
data.src_addr = src;
data.len = size;
@@ -1250,7 +1312,7 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
if (IS_ERR(src_p))
return PTR_ERR(src_p);
- dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
+ dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, FOLL_WRITE);
if (IS_ERR(dst_p)) {
sev_unpin_memory(kvm, src_p, n);
return PTR_ERR(dst_p);
@@ -1302,7 +1364,6 @@ err:
static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_launch_secret data;
struct kvm_sev_launch_secret params;
struct page **pages;
@@ -1316,7 +1377,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (copy_from_user(&params, u64_to_user_ptr(argp->data), sizeof(params)))
return -EFAULT;
- pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
+ pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, FOLL_WRITE);
if (IS_ERR(pages))
return PTR_ERR(pages);
@@ -1358,7 +1419,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
data.hdr_address = __psp_pa(hdr);
data.hdr_len = params.hdr_len;
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, &data, &argp->error);
kfree(hdr);
@@ -1378,7 +1439,6 @@ e_unpin_memory:
static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
void __user *report = u64_to_user_ptr(argp->data);
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_attestation_report data;
struct kvm_sev_attestation_report params;
void __user *p;
@@ -1411,7 +1471,7 @@ static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp)
memcpy(data.mnonce, params.mnonce, sizeof(params.mnonce));
}
cmd:
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_ATTESTATION_REPORT, &data, &argp->error);
/*
* If we query the session length, FW responded with expected data.
@@ -1441,12 +1501,11 @@ static int
__sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp,
struct kvm_sev_send_start *params)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_send_start data;
int ret;
memset(&data, 0, sizeof(data));
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error);
params->session_len = data.session_len;
@@ -1459,7 +1518,6 @@ __sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp,
static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_send_start data;
struct kvm_sev_send_start params;
void *amd_certs, *session_data;
@@ -1520,7 +1578,7 @@ static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
data.amd_certs_len = params.amd_certs_len;
data.session_address = __psp_pa(session_data);
data.session_len = params.session_len;
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error);
@@ -1552,12 +1610,11 @@ static int
__sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp,
struct kvm_sev_send_update_data *params)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_send_update_data data;
int ret;
memset(&data, 0, sizeof(data));
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error);
params->hdr_len = data.hdr_len;
@@ -1572,7 +1629,6 @@ __sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp,
static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_send_update_data data;
struct kvm_sev_send_update_data params;
void *hdr, *trans_data;
@@ -1608,11 +1664,11 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* allocate memory for header and transport buffer */
ret = -ENOMEM;
- hdr = kzalloc(params.hdr_len, GFP_KERNEL_ACCOUNT);
+ hdr = kzalloc(params.hdr_len, GFP_KERNEL);
if (!hdr)
goto e_unpin;
- trans_data = kzalloc(params.trans_len, GFP_KERNEL_ACCOUNT);
+ trans_data = kzalloc(params.trans_len, GFP_KERNEL);
if (!trans_data)
goto e_free_hdr;
@@ -1626,7 +1682,7 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
data.guest_address |= sev_me_mask;
data.guest_len = params.guest_len;
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error);
@@ -1657,31 +1713,29 @@ e_unpin:
static int sev_send_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_send_finish data;
if (!sev_guest(kvm))
return -ENOTTY;
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
return sev_issue_cmd(kvm, SEV_CMD_SEND_FINISH, &data, &argp->error);
}
static int sev_send_cancel(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_send_cancel data;
if (!sev_guest(kvm))
return -ENOTTY;
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
return sev_issue_cmd(kvm, SEV_CMD_SEND_CANCEL, &data, &argp->error);
}
static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct sev_data_receive_start start;
struct kvm_sev_receive_start params;
int *error = &argp->error;
@@ -1755,7 +1809,6 @@ e_free_pdh:
static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_receive_update_data params;
struct sev_data_receive_update_data data;
void *hdr = NULL, *trans = NULL;
@@ -1798,7 +1851,7 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* Pin guest memory */
guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
- PAGE_SIZE, &n, 1);
+ PAGE_SIZE, &n, FOLL_WRITE);
if (IS_ERR(guest_page)) {
ret = PTR_ERR(guest_page);
goto e_free_trans;
@@ -1815,7 +1868,7 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
data.guest_address |= sev_me_mask;
data.guest_len = params.guest_len;
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_RECEIVE_UPDATE_DATA, &data,
&argp->error);
@@ -1832,13 +1885,12 @@ e_free_hdr:
static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_receive_finish data;
if (!sev_guest(kvm))
return -ENOTTY;
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error);
}
@@ -1858,8 +1910,8 @@ static bool is_cmd_allowed_from_mirror(u32 cmd_id)
static int sev_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
{
- struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info;
- struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info;
+ struct kvm_sev_info *dst_sev = to_kvm_sev_info(dst_kvm);
+ struct kvm_sev_info *src_sev = to_kvm_sev_info(src_kvm);
int r = -EBUSY;
if (dst_kvm == src_kvm)
@@ -1893,8 +1945,8 @@ release_dst:
static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
{
- struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info;
- struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info;
+ struct kvm_sev_info *dst_sev = to_kvm_sev_info(dst_kvm);
+ struct kvm_sev_info *src_sev = to_kvm_sev_info(src_kvm);
mutex_unlock(&dst_kvm->lock);
mutex_unlock(&src_kvm->lock);
@@ -1902,74 +1954,10 @@ static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
atomic_set_release(&src_sev->migration_in_progress, 0);
}
-/* vCPU mutex subclasses. */
-enum sev_migration_role {
- SEV_MIGRATION_SOURCE = 0,
- SEV_MIGRATION_TARGET,
- SEV_NR_MIGRATION_ROLES,
-};
-
-static int sev_lock_vcpus_for_migration(struct kvm *kvm,
- enum sev_migration_role role)
-{
- struct kvm_vcpu *vcpu;
- unsigned long i, j;
-
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (mutex_lock_killable_nested(&vcpu->mutex, role))
- goto out_unlock;
-
-#ifdef CONFIG_PROVE_LOCKING
- if (!i)
- /*
- * Reset the role to one that avoids colliding with
- * the role used for the first vcpu mutex.
- */
- role = SEV_NR_MIGRATION_ROLES;
- else
- mutex_release(&vcpu->mutex.dep_map, _THIS_IP_);
-#endif
- }
-
- return 0;
-
-out_unlock:
-
- kvm_for_each_vcpu(j, vcpu, kvm) {
- if (i == j)
- break;
-
-#ifdef CONFIG_PROVE_LOCKING
- if (j)
- mutex_acquire(&vcpu->mutex.dep_map, role, 0, _THIS_IP_);
-#endif
-
- mutex_unlock(&vcpu->mutex);
- }
- return -EINTR;
-}
-
-static void sev_unlock_vcpus_for_migration(struct kvm *kvm)
-{
- struct kvm_vcpu *vcpu;
- unsigned long i;
- bool first = true;
-
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (first)
- first = false;
- else
- mutex_acquire(&vcpu->mutex.dep_map,
- SEV_NR_MIGRATION_ROLES, 0, _THIS_IP_);
-
- mutex_unlock(&vcpu->mutex);
- }
-}
-
static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm)
{
- struct kvm_sev_info *dst = &to_kvm_svm(dst_kvm)->sev_info;
- struct kvm_sev_info *src = &to_kvm_svm(src_kvm)->sev_info;
+ struct kvm_sev_info *dst = to_kvm_sev_info(dst_kvm);
+ struct kvm_sev_info *src = to_kvm_sev_info(src_kvm);
struct kvm_vcpu *dst_vcpu, *src_vcpu;
struct vcpu_svm *dst_svm, *src_svm;
struct kvm_sev_info *mirror;
@@ -2009,8 +1997,7 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm)
* and add the new mirror to the list.
*/
if (is_mirroring_enc_context(dst_kvm)) {
- struct kvm_sev_info *owner_sev_info =
- &to_kvm_svm(dst->enc_context_owner)->sev_info;
+ struct kvm_sev_info *owner_sev_info = to_kvm_sev_info(dst->enc_context_owner);
list_del(&src->mirror_entry);
list_add_tail(&dst->mirror_entry, &owner_sev_info->mirror_vms);
@@ -2019,7 +2006,7 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm)
kvm_for_each_vcpu(i, dst_vcpu, dst_kvm) {
dst_svm = to_svm(dst_vcpu);
- sev_init_vmcb(dst_svm);
+ sev_init_vmcb(dst_svm, false);
if (!dst->es_active)
continue;
@@ -2053,6 +2040,10 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src)
struct kvm_vcpu *src_vcpu;
unsigned long i;
+ if (src->created_vcpus != atomic_read(&src->online_vcpus) ||
+ dst->created_vcpus != atomic_read(&dst->online_vcpus))
+ return -EBUSY;
+
if (!sev_es_guest(src))
return 0;
@@ -2069,7 +2060,7 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src)
int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
{
- struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *dst_sev = to_kvm_sev_info(kvm);
struct kvm_sev_info *src_sev, *cg_cleanup_sev;
CLASS(fd, f)(source_fd);
struct kvm *source_kvm;
@@ -2093,7 +2084,7 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
goto out_unlock;
}
- src_sev = &to_kvm_svm(source_kvm)->sev_info;
+ src_sev = to_kvm_sev_info(source_kvm);
dst_sev->misc_cg = get_current_misc_cg();
cg_cleanup_sev = dst_sev;
@@ -2104,10 +2095,10 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
charged = true;
}
- ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE);
+ ret = kvm_lock_all_vcpus(kvm);
if (ret)
goto out_dst_cgroup;
- ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET);
+ ret = kvm_lock_all_vcpus(source_kvm);
if (ret)
goto out_dst_vcpu;
@@ -2115,15 +2106,26 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
if (ret)
goto out_source_vcpu;
+ /*
+ * Allocate a new have_run_cpus for the destination, i.e. don't copy
+ * the set of CPUs from the source. If a CPU was used to run a vCPU in
+ * the source VM but is never used for the destination VM, then the CPU
+ * can only have cached memory that was accessible to the source VM.
+ */
+ if (!zalloc_cpumask_var(&dst_sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) {
+ ret = -ENOMEM;
+ goto out_source_vcpu;
+ }
+
sev_migrate_from(kvm, source_kvm);
kvm_vm_dead(source_kvm);
cg_cleanup_sev = src_sev;
ret = 0;
out_source_vcpu:
- sev_unlock_vcpus_for_migration(source_kvm);
+ kvm_unlock_all_vcpus(source_kvm);
out_dst_vcpu:
- sev_unlock_vcpus_for_migration(kvm);
+ kvm_unlock_all_vcpus(kvm);
out_dst_cgroup:
/* Operates on the source on success, on the destination on failure. */
if (charged)
@@ -2145,6 +2147,10 @@ int sev_dev_get_attr(u32 group, u64 attr, u64 *val)
*val = sev_supported_vmsa_features;
return 0;
+ case KVM_X86_SNP_POLICY_BITS:
+ *val = snp_supported_policy_bits;
+ return 0;
+
default:
return -ENXIO;
}
@@ -2181,7 +2187,7 @@ static void *snp_context_create(struct kvm *kvm, struct kvm_sev_cmd *argp)
static int snp_bind_asid(struct kvm *kvm, int *error)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct sev_data_snp_activate data = {0};
data.gctx_paddr = __psp_pa(sev->snp_context);
@@ -2191,7 +2197,7 @@ static int snp_bind_asid(struct kvm *kvm, int *error)
static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct sev_data_snp_launch_start start = {0};
struct kvm_sev_snp_launch_start params;
int rc;
@@ -2209,16 +2215,19 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (params.flags)
return -EINVAL;
- if (params.policy & ~SNP_POLICY_MASK_VALID)
+ if (params.policy & ~snp_supported_policy_bits)
return -EINVAL;
/* Check for policy bits that must be set */
- if (!(params.policy & SNP_POLICY_MASK_RSVD_MBO) ||
- !(params.policy & SNP_POLICY_MASK_SMT))
+ if (!(params.policy & SNP_POLICY_MASK_RSVD_MBO))
return -EINVAL;
- if (params.policy & SNP_POLICY_MASK_SINGLE_SOCKET)
- return -EINVAL;
+ if (snp_is_secure_tsc_enabled(kvm)) {
+ if (WARN_ON_ONCE(!kvm->arch.default_tsc_khz))
+ return -EINVAL;
+
+ start.desired_tsc_khz = kvm->arch.default_tsc_khz;
+ }
sev->snp_context = snp_context_create(kvm, argp);
if (!sev->snp_context)
@@ -2226,6 +2235,7 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
start.gctx_paddr = __psp_pa(sev->snp_context);
start.policy = params.policy;
+
memcpy(start.gosvw, params.gosvw, sizeof(params.gosvw));
rc = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_LAUNCH_START, &start, &argp->error);
if (rc) {
@@ -2234,6 +2244,7 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
goto e_free_context;
}
+ sev->policy = params.policy;
sev->fd = argp->sev_fd;
rc = snp_bind_asid(kvm, &argp->error);
if (rc) {
@@ -2260,7 +2271,7 @@ static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn_start, kvm_pfn_t pf
void __user *src, int order, void *opaque)
{
struct sev_gmem_populate_args *sev_populate_args = opaque;
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
int n_private = 0, ret, i;
int npages = (1 << order);
gfn_t gfn;
@@ -2350,7 +2361,7 @@ err:
static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct sev_gmem_populate_args sev_populate_args = {0};
struct kvm_sev_snp_launch_update params;
struct kvm_memory_slot *memslot;
@@ -2367,7 +2378,7 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
pr_debug("%s: GFN start 0x%llx length 0x%llx type %d flags %d\n", __func__,
params.gfn_start, params.len, params.type, params.flags);
- if (!PAGE_ALIGNED(params.len) || params.flags ||
+ if (!params.len || !PAGE_ALIGNED(params.len) || params.flags ||
(params.type != KVM_SEV_SNP_PAGE_TYPE_NORMAL &&
params.type != KVM_SEV_SNP_PAGE_TYPE_ZERO &&
params.type != KVM_SEV_SNP_PAGE_TYPE_UNMEASURED &&
@@ -2399,7 +2410,7 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
mutex_lock(&kvm->slots_lock);
memslot = gfn_to_memslot(kvm, params.gfn_start);
- if (!kvm_slot_can_be_private(memslot)) {
+ if (!kvm_slot_has_gmem(memslot)) {
ret = -EINVAL;
goto out;
}
@@ -2434,7 +2445,7 @@ out:
static int snp_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct sev_data_snp_launch_update data = {};
struct kvm_vcpu *vcpu;
unsigned long i;
@@ -2482,7 +2493,7 @@ static int snp_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct kvm_sev_snp_launch_finish params;
struct sev_data_snp_launch_finish *data;
void *id_block = NULL, *id_auth = NULL;
@@ -2677,7 +2688,7 @@ out:
int sev_mem_enc_register_region(struct kvm *kvm,
struct kvm_enc_region *range)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct enc_region *region;
int ret = 0;
@@ -2696,7 +2707,8 @@ int sev_mem_enc_register_region(struct kvm *kvm,
return -ENOMEM;
mutex_lock(&kvm->lock);
- region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
+ region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages,
+ FOLL_WRITE | FOLL_LONGTERM);
if (IS_ERR(region->pages)) {
ret = PTR_ERR(region->pages);
mutex_unlock(&kvm->lock);
@@ -2729,7 +2741,7 @@ e_free:
static struct enc_region *
find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct list_head *head = &sev->regions_list;
struct enc_region *i;
@@ -2773,12 +2785,7 @@ int sev_mem_enc_unregister_region(struct kvm *kvm,
goto failed;
}
- /*
- * Ensure that all guest tagged cache entries are flushed before
- * releasing the pages back to the system for use. CLFLUSH will
- * not do this, so issue a WBINVD.
- */
- wbinvd_on_all_cpus();
+ sev_writeback_caches(kvm);
__unregister_enc_region_locked(kvm, region);
@@ -2820,13 +2827,18 @@ int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd)
goto e_unlock;
}
+ mirror_sev = to_kvm_sev_info(kvm);
+ if (!zalloc_cpumask_var(&mirror_sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) {
+ ret = -ENOMEM;
+ goto e_unlock;
+ }
+
/*
* The mirror kvm holds an enc_context_owner ref so its asid can't
* disappear until we're done with it
*/
- source_sev = &to_kvm_svm(source_kvm)->sev_info;
+ source_sev = to_kvm_sev_info(source_kvm);
kvm_get_kvm(source_kvm);
- mirror_sev = &to_kvm_svm(kvm)->sev_info;
list_add_tail(&mirror_sev->mirror_entry, &source_sev->mirror_vms);
/* Set enc_context_owner and copy its encryption context over */
@@ -2854,7 +2866,7 @@ e_unlock:
static int snp_decommission_context(struct kvm *kvm)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct sev_data_snp_addr data = {};
int ret;
@@ -2879,7 +2891,7 @@ static int snp_decommission_context(struct kvm *kvm)
void sev_vm_destroy(struct kvm *kvm)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct list_head *head = &sev->regions_list;
struct list_head *pos, *q;
@@ -2888,7 +2900,13 @@ void sev_vm_destroy(struct kvm *kvm)
WARN_ON(!list_empty(&sev->mirror_vms));
- /* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */
+ free_cpumask_var(sev->have_run_cpus);
+
+ /*
+ * If this is a mirror VM, remove it from the owner's list of a mirrors
+ * and skip ASID cleanup (the ASID is tied to the lifetime of the owner).
+ * Note, mirror VMs don't support registering encrypted regions.
+ */
if (is_mirroring_enc_context(kvm)) {
struct kvm *owner_kvm = sev->enc_context_owner;
@@ -2899,12 +2917,6 @@ void sev_vm_destroy(struct kvm *kvm)
return;
}
- /*
- * Ensure that all guest tagged cache entries are flushed before
- * releasing the pages back to the system for use. CLFLUSH will
- * not do this, so issue a WBINVD.
- */
- wbinvd_on_all_cpus();
/*
* if userspace was terminated before unregistering the memory regions
@@ -2950,9 +2962,37 @@ void __init sev_set_cpu_caps(void)
}
}
+static bool is_sev_snp_initialized(void)
+{
+ struct sev_user_data_snp_status *status;
+ struct sev_data_snp_addr buf;
+ bool initialized = false;
+ int ret, error = 0;
+
+ status = snp_alloc_firmware_page(GFP_KERNEL | __GFP_ZERO);
+ if (!status)
+ return false;
+
+ buf.address = __psp_pa(status);
+ ret = sev_do_cmd(SEV_CMD_SNP_PLATFORM_STATUS, &buf, &error);
+ if (ret) {
+ pr_err("SEV: SNP_PLATFORM_STATUS failed ret=%d, fw_error=%d (%#x)\n",
+ ret, error, error);
+ goto out;
+ }
+
+ initialized = !!status->state;
+
+out:
+ snp_free_firmware_page(status);
+
+ return initialized;
+}
+
void __init sev_hardware_setup(void)
{
unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count;
+ struct sev_platform_init_args init_args = {0};
bool sev_snp_supported = false;
bool sev_es_supported = false;
bool sev_supported = false;
@@ -2972,6 +3012,16 @@ void __init sev_hardware_setup(void)
WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_FLUSHBYASID)))
goto out;
+ /*
+ * The kernel's initcall infrastructure lacks the ability to express
+ * dependencies between initcalls, whereas the modules infrastructure
+ * automatically handles dependencies via symbol loading. Ensure the
+ * PSP SEV driver is initialized before proceeding if KVM is built-in,
+ * as the dependency isn't handled by the initcall infrastructure.
+ */
+ if (IS_BUILTIN(CONFIG_KVM_AMD) && sev_module_init())
+ goto out;
+
/* Retrieve SEV CPUID information */
cpuid(0x8000001f, &eax, &ebx, &ecx, &edx);
@@ -3037,12 +3087,48 @@ void __init sev_hardware_setup(void)
if (min_sev_asid == 1)
goto out;
+ min_sev_es_asid = min_snp_asid = 1;
+ max_sev_es_asid = max_snp_asid = min_sev_asid - 1;
+
sev_es_asid_count = min_sev_asid - 1;
WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV_ES, sev_es_asid_count));
sev_es_supported = true;
sev_snp_supported = sev_snp_enabled && cc_platform_has(CC_ATTR_HOST_SEV_SNP);
out:
+ if (sev_enabled) {
+ init_args.probe = true;
+
+ if (sev_is_snp_ciphertext_hiding_supported())
+ init_args.max_snp_asid = min(nr_ciphertext_hiding_asids,
+ min_sev_asid - 1);
+
+ if (sev_platform_init(&init_args))
+ sev_supported = sev_es_supported = sev_snp_supported = false;
+ else if (sev_snp_supported)
+ sev_snp_supported = is_sev_snp_initialized();
+
+ if (sev_snp_supported) {
+ snp_supported_policy_bits = sev_get_snp_policy_bits() &
+ KVM_SNP_POLICY_MASK_VALID;
+ nr_ciphertext_hiding_asids = init_args.max_snp_asid;
+ }
+
+ /*
+ * If ciphertext hiding is enabled, the joint SEV-ES/SEV-SNP
+ * ASID range is partitioned into separate SEV-ES and SEV-SNP
+ * ASID ranges, with the SEV-SNP range being [1..max_snp_asid]
+ * and the SEV-ES range being (max_snp_asid..max_sev_es_asid].
+ * Note, SEV-ES may effectively be disabled if all ASIDs from
+ * the joint range are assigned to SEV-SNP.
+ */
+ if (nr_ciphertext_hiding_asids) {
+ max_snp_asid = nr_ciphertext_hiding_asids;
+ min_sev_es_asid = max_snp_asid + 1;
+ pr_info("SEV-SNP ciphertext hiding enabled\n");
+ }
+ }
+
if (boot_cpu_has(X86_FEATURE_SEV))
pr_info("SEV %s (ASIDs %u - %u)\n",
sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" :
@@ -3051,12 +3137,14 @@ out:
min_sev_asid, max_sev_asid);
if (boot_cpu_has(X86_FEATURE_SEV_ES))
pr_info("SEV-ES %s (ASIDs %u - %u)\n",
- sev_es_supported ? "enabled" : "disabled",
- min_sev_asid > 1 ? 1 : 0, min_sev_asid - 1);
+ sev_es_supported ? min_sev_es_asid <= max_sev_es_asid ? "enabled" :
+ "unusable" :
+ "disabled",
+ min_sev_es_asid, max_sev_es_asid);
if (boot_cpu_has(X86_FEATURE_SEV_SNP))
pr_info("SEV-SNP %s (ASIDs %u - %u)\n",
- sev_snp_supported ? "enabled" : "disabled",
- min_sev_asid > 1 ? 1 : 0, min_sev_asid - 1);
+ str_enabled_disabled(sev_snp_supported),
+ min_snp_asid, max_snp_asid);
sev_enabled = sev_supported;
sev_es_enabled = sev_es_supported;
@@ -3069,6 +3157,9 @@ out:
sev_supported_vmsa_features = 0;
if (sev_es_debug_swap_enabled)
sev_supported_vmsa_features |= SVM_SEV_FEAT_DEBUG_SWAP;
+
+ if (sev_snp_enabled && tsc_khz && cpu_feature_enabled(X86_FEATURE_SNP_SECURE_TSC))
+ sev_supported_vmsa_features |= SVM_SEV_FEAT_SECURE_TSC;
}
void sev_hardware_unsetup(void)
@@ -3084,6 +3175,8 @@ void sev_hardware_unsetup(void)
misc_cg_set_capacity(MISC_CG_RES_SEV, 0);
misc_cg_set_capacity(MISC_CG_RES_SEV_ES, 0);
+
+ sev_platform_shutdown();
}
int sev_cpu_init(struct svm_cpu_data *sd)
@@ -3126,30 +3219,29 @@ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
/*
* VM Page Flush takes a host virtual address and a guest ASID. Fall
- * back to WBINVD if this faults so as not to make any problems worse
- * by leaving stale encrypted data in the cache.
+ * back to full writeback of caches if this faults so as not to make
+ * any problems worse by leaving stale encrypted data in the cache.
*/
- if (WARN_ON_ONCE(wrmsrl_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid)))
- goto do_wbinvd;
+ if (WARN_ON_ONCE(wrmsrq_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid)))
+ goto do_sev_writeback_caches;
return;
-do_wbinvd:
- wbinvd_on_all_cpus();
+do_sev_writeback_caches:
+ sev_writeback_caches(vcpu->kvm);
}
void sev_guest_memory_reclaimed(struct kvm *kvm)
{
/*
* With SNP+gmem, private/encrypted memory is unreachable via the
- * hva-based mmu notifiers, so these events are only actually
- * pertaining to shared pages where there is no need to perform
- * the WBINVD to flush associated caches.
+ * hva-based mmu notifiers, i.e. these events are explicitly scoped to
+ * shared pages, where there's no need to flush caches.
*/
if (!sev_guest(kvm) || sev_snp_guest(kvm))
return;
- wbinvd_on_all_cpus();
+ sev_writeback_caches(kvm);
}
void sev_free_vcpu(struct kvm_vcpu *vcpu)
@@ -3183,9 +3275,14 @@ skip_vmsa_free:
kvfree(svm->sev_es.ghcb_sa);
}
+static u64 kvm_get_cached_sw_exit_code(struct vmcb_control_area *control)
+{
+ return (((u64)control->exit_code_hi) << 32) | control->exit_code;
+}
+
static void dump_ghcb(struct vcpu_svm *svm)
{
- struct ghcb *ghcb = svm->sev_es.ghcb;
+ struct vmcb_control_area *control = &svm->vmcb->control;
unsigned int nbits;
/* Re-use the dump_invalid_vmcb module parameter */
@@ -3194,18 +3291,24 @@ static void dump_ghcb(struct vcpu_svm *svm)
return;
}
- nbits = sizeof(ghcb->save.valid_bitmap) * 8;
+ nbits = sizeof(svm->sev_es.valid_bitmap) * 8;
- pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa);
+ /*
+ * Print KVM's snapshot of the GHCB values that were (unsuccessfully)
+ * used to handle the exit. If the guest has since modified the GHCB
+ * itself, dumping the raw GHCB won't help debug why KVM was unable to
+ * handle the VMGEXIT that KVM observed.
+ */
+ pr_err("GHCB (GPA=%016llx) snapshot:\n", svm->vmcb->control.ghcb_gpa);
pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code",
- ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb));
+ kvm_get_cached_sw_exit_code(control), kvm_ghcb_sw_exit_code_is_valid(svm));
pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1",
- ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb));
+ control->exit_info_1, kvm_ghcb_sw_exit_info_1_is_valid(svm));
pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2",
- ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb));
+ control->exit_info_2, kvm_ghcb_sw_exit_info_2_is_valid(svm));
pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch",
- ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb));
- pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
+ svm->sev_es.sw_scratch, kvm_ghcb_sw_scratch_is_valid(svm));
+ pr_err("%-20s%*pb\n", "valid_bitmap", nbits, svm->sev_es.valid_bitmap);
}
static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
@@ -3251,36 +3354,32 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
BUILD_BUG_ON(sizeof(svm->sev_es.valid_bitmap) != sizeof(ghcb->save.valid_bitmap));
memcpy(&svm->sev_es.valid_bitmap, &ghcb->save.valid_bitmap, sizeof(ghcb->save.valid_bitmap));
- vcpu->arch.regs[VCPU_REGS_RAX] = kvm_ghcb_get_rax_if_valid(svm, ghcb);
- vcpu->arch.regs[VCPU_REGS_RBX] = kvm_ghcb_get_rbx_if_valid(svm, ghcb);
- vcpu->arch.regs[VCPU_REGS_RCX] = kvm_ghcb_get_rcx_if_valid(svm, ghcb);
- vcpu->arch.regs[VCPU_REGS_RDX] = kvm_ghcb_get_rdx_if_valid(svm, ghcb);
- vcpu->arch.regs[VCPU_REGS_RSI] = kvm_ghcb_get_rsi_if_valid(svm, ghcb);
+ vcpu->arch.regs[VCPU_REGS_RAX] = kvm_ghcb_get_rax_if_valid(svm);
+ vcpu->arch.regs[VCPU_REGS_RBX] = kvm_ghcb_get_rbx_if_valid(svm);
+ vcpu->arch.regs[VCPU_REGS_RCX] = kvm_ghcb_get_rcx_if_valid(svm);
+ vcpu->arch.regs[VCPU_REGS_RDX] = kvm_ghcb_get_rdx_if_valid(svm);
+ vcpu->arch.regs[VCPU_REGS_RSI] = kvm_ghcb_get_rsi_if_valid(svm);
- svm->vmcb->save.cpl = kvm_ghcb_get_cpl_if_valid(svm, ghcb);
+ svm->vmcb->save.cpl = kvm_ghcb_get_cpl_if_valid(svm);
- if (kvm_ghcb_xcr0_is_valid(svm)) {
- vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb);
- kvm_update_cpuid_runtime(vcpu);
- }
+ if (kvm_ghcb_xcr0_is_valid(svm))
+ __kvm_set_xcr(vcpu, 0, kvm_ghcb_get_xcr0(svm));
+
+ if (kvm_ghcb_xss_is_valid(svm))
+ __kvm_emulate_msr_write(vcpu, MSR_IA32_XSS, kvm_ghcb_get_xss(svm));
/* Copy the GHCB exit information into the VMCB fields */
- exit_code = ghcb_get_sw_exit_code(ghcb);
+ exit_code = kvm_ghcb_get_sw_exit_code(svm);
control->exit_code = lower_32_bits(exit_code);
control->exit_code_hi = upper_32_bits(exit_code);
- control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb);
- control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb);
- svm->sev_es.sw_scratch = kvm_ghcb_get_sw_scratch_if_valid(svm, ghcb);
+ control->exit_info_1 = kvm_ghcb_get_sw_exit_info_1(svm);
+ control->exit_info_2 = kvm_ghcb_get_sw_exit_info_2(svm);
+ svm->sev_es.sw_scratch = kvm_ghcb_get_sw_scratch_if_valid(svm);
/* Clear the valid entries fields */
memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
}
-static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control)
-{
- return (((u64)control->exit_code_hi) << 32) | control->exit_code;
-}
-
static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
@@ -3292,7 +3391,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
* Retrieve the exit code now even though it may not be marked valid
* as it could help with debugging.
*/
- exit_code = kvm_ghcb_get_sw_exit_code(control);
+ exit_code = kvm_get_cached_sw_exit_code(control);
/* Only GHCB Usage code 0 is supported */
if (svm->sev_es.ghcb->ghcb_usage) {
@@ -3420,8 +3519,7 @@ vmgexit_err:
dump_ghcb(svm);
}
- ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, reason);
+ svm_vmgexit_bad_input(svm, reason);
/* Resume the guest to "return" the error code. */
return 1;
@@ -3462,10 +3560,28 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
svm->sev_es.ghcb = NULL;
}
-void pre_sev_run(struct vcpu_svm *svm, int cpu)
+int pre_sev_run(struct vcpu_svm *svm, int cpu)
{
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
- unsigned int asid = sev_get_asid(svm->vcpu.kvm);
+ struct kvm *kvm = svm->vcpu.kvm;
+ unsigned int asid = sev_get_asid(kvm);
+
+ /*
+ * Reject KVM_RUN if userspace attempts to run the vCPU with an invalid
+ * VMSA, e.g. if userspace forces the vCPU to be RUNNABLE after an SNP
+ * AP Destroy event.
+ */
+ if (sev_es_guest(kvm) && !VALID_PAGE(svm->vmcb->control.vmsa_pa))
+ return -EINVAL;
+
+ /*
+ * To optimize cache flushes when memory is reclaimed from an SEV VM,
+ * track physical CPUs that enter the guest for SEV VMs and thus can
+ * have encrypted, dirty data in the cache, and flush caches only for
+ * CPUs that have entered the guest.
+ */
+ if (!cpumask_test_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus))
+ cpumask_set_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus);
/* Assign the asid allocated with this SEV guest */
svm->asid = asid;
@@ -3478,11 +3594,12 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
*/
if (sd->sev_vmcbs[asid] == svm->vmcb &&
svm->vcpu.arch.last_vmentry_cpu == cpu)
- return;
+ return 0;
sd->sev_vmcbs[asid] = svm->vmcb;
svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
+ return 0;
}
#define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE)
@@ -3564,8 +3681,7 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
return 0;
e_scratch:
- ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_SCRATCH_AREA);
+ svm_vmgexit_bad_input(svm, GHCB_ERR_INVALID_SCRATCH_AREA);
return 1;
}
@@ -3627,13 +3743,20 @@ static int snp_begin_psc_msr(struct vcpu_svm *svm, u64 ghcb_msr)
return 1; /* resume guest */
}
- if (!(vcpu->kvm->arch.hypercall_exit_enabled & (1 << KVM_HC_MAP_GPA_RANGE))) {
+ if (!user_exit_on_hypercall(vcpu->kvm, KVM_HC_MAP_GPA_RANGE)) {
set_ghcb_msr(svm, GHCB_MSR_PSC_RESP_ERROR);
return 1; /* resume guest */
}
vcpu->run->exit_reason = KVM_EXIT_HYPERCALL;
vcpu->run->hypercall.nr = KVM_HC_MAP_GPA_RANGE;
+ /*
+ * In principle this should have been -KVM_ENOSYS, but userspace (QEMU <=9.2)
+ * assumed that vcpu->run->hypercall.ret is never changed by KVM and thus that
+ * it was always zero on KVM_EXIT_HYPERCALL. Since KVM is now overwriting
+ * vcpu->run->hypercall.ret, ensuring that it is zero to not break QEMU.
+ */
+ vcpu->run->hypercall.ret = 0;
vcpu->run->hypercall.args[0] = gpa;
vcpu->run->hypercall.args[1] = 1;
vcpu->run->hypercall.args[2] = (op == SNP_PAGE_STATE_PRIVATE)
@@ -3658,7 +3781,14 @@ static void snp_complete_psc(struct vcpu_svm *svm, u64 psc_ret)
svm->sev_es.psc_inflight = 0;
svm->sev_es.psc_idx = 0;
svm->sev_es.psc_2m = false;
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, psc_ret);
+
+ /*
+ * PSC requests always get a "no action" response in SW_EXITINFO1, with
+ * a PSC-specific return code in SW_EXITINFO2 that provides the "real"
+ * return code. E.g. if the PSC request was interrupted, the need to
+ * retry is communicated via SW_EXITINFO2, not SW_EXITINFO1.
+ */
+ svm_vmgexit_no_action(svm, psc_ret);
}
static void __snp_complete_one_psc(struct vcpu_svm *svm)
@@ -3710,7 +3840,7 @@ static int snp_begin_psc(struct vcpu_svm *svm, struct psc_buffer *psc)
bool huge;
u64 gfn;
- if (!(vcpu->kvm->arch.hypercall_exit_enabled & (1 << KVM_HC_MAP_GPA_RANGE))) {
+ if (!user_exit_on_hypercall(vcpu->kvm, KVM_HC_MAP_GPA_RANGE)) {
snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC);
return 1;
}
@@ -3797,6 +3927,13 @@ next_range:
case VMGEXIT_PSC_OP_SHARED:
vcpu->run->exit_reason = KVM_EXIT_HYPERCALL;
vcpu->run->hypercall.nr = KVM_HC_MAP_GPA_RANGE;
+ /*
+ * In principle this should have been -KVM_ENOSYS, but userspace (QEMU <=9.2)
+ * assumed that vcpu->run->hypercall.ret is never changed by KVM and thus that
+ * it was always zero on KVM_EXIT_HYPERCALL. Since KVM is now overwriting
+ * vcpu->run->hypercall.ret, ensuring that it is zero to not break QEMU.
+ */
+ vcpu->run->hypercall.ret = 0;
vcpu->run->hypercall.args[0] = gfn_to_gpa(gfn);
vcpu->run->hypercall.args[1] = npages;
vcpu->run->hypercall.args[2] = entry_start.operation == VMGEXIT_PSC_OP_PRIVATE
@@ -3820,113 +3957,90 @@ next_range:
goto next_range;
}
- unreachable();
+ BUG();
}
-static int __sev_snp_update_protected_guest_state(struct kvm_vcpu *vcpu)
+/*
+ * Invoked as part of svm_vcpu_reset() processing of an init event.
+ */
+static void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ struct kvm_memory_slot *slot;
+ struct page *page;
+ kvm_pfn_t pfn;
+ gfn_t gfn;
+
+ guard(mutex)(&svm->sev_es.snp_vmsa_mutex);
+
+ if (!svm->sev_es.snp_ap_waiting_for_reset)
+ return;
- WARN_ON(!mutex_is_locked(&svm->sev_es.snp_vmsa_mutex));
+ svm->sev_es.snp_ap_waiting_for_reset = false;
/* Mark the vCPU as offline and not runnable */
vcpu->arch.pv.pv_unhalted = false;
- vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_HALTED);
/* Clear use of the VMSA */
svm->vmcb->control.vmsa_pa = INVALID_PAGE;
- if (VALID_PAGE(svm->sev_es.snp_vmsa_gpa)) {
- gfn_t gfn = gpa_to_gfn(svm->sev_es.snp_vmsa_gpa);
- struct kvm_memory_slot *slot;
- struct page *page;
- kvm_pfn_t pfn;
-
- slot = gfn_to_memslot(vcpu->kvm, gfn);
- if (!slot)
- return -EINVAL;
-
- /*
- * The new VMSA will be private memory guest memory, so
- * retrieve the PFN from the gmem backend.
- */
- if (kvm_gmem_get_pfn(vcpu->kvm, slot, gfn, &pfn, &page, NULL))
- return -EINVAL;
-
- /*
- * From this point forward, the VMSA will always be a
- * guest-mapped page rather than the initial one allocated
- * by KVM in svm->sev_es.vmsa. In theory, svm->sev_es.vmsa
- * could be free'd and cleaned up here, but that involves
- * cleanups like wbinvd_on_all_cpus() which would ideally
- * be handled during teardown rather than guest boot.
- * Deferring that also allows the existing logic for SEV-ES
- * VMSAs to be re-used with minimal SNP-specific changes.
- */
- svm->sev_es.snp_has_guest_vmsa = true;
-
- /* Use the new VMSA */
- svm->vmcb->control.vmsa_pa = pfn_to_hpa(pfn);
-
- /* Mark the vCPU as runnable */
- vcpu->arch.pv.pv_unhalted = false;
- vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-
- svm->sev_es.snp_vmsa_gpa = INVALID_PAGE;
-
- /*
- * gmem pages aren't currently migratable, but if this ever
- * changes then care should be taken to ensure
- * svm->sev_es.vmsa is pinned through some other means.
- */
- kvm_release_page_clean(page);
- }
-
/*
* When replacing the VMSA during SEV-SNP AP creation,
* mark the VMCB dirty so that full state is always reloaded.
*/
vmcb_mark_all_dirty(svm->vmcb);
- return 0;
-}
+ if (!VALID_PAGE(svm->sev_es.snp_vmsa_gpa))
+ return;
-/*
- * Invoked as part of svm_vcpu_reset() processing of an init event.
- */
-void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
- int ret;
+ gfn = gpa_to_gfn(svm->sev_es.snp_vmsa_gpa);
+ svm->sev_es.snp_vmsa_gpa = INVALID_PAGE;
- if (!sev_snp_guest(vcpu->kvm))
+ slot = gfn_to_memslot(vcpu->kvm, gfn);
+ if (!slot)
return;
- mutex_lock(&svm->sev_es.snp_vmsa_mutex);
+ /*
+ * The new VMSA will be private memory guest memory, so retrieve the
+ * PFN from the gmem backend.
+ */
+ if (kvm_gmem_get_pfn(vcpu->kvm, slot, gfn, &pfn, &page, NULL))
+ return;
- if (!svm->sev_es.snp_ap_waiting_for_reset)
- goto unlock;
+ /*
+ * From this point forward, the VMSA will always be a guest-mapped page
+ * rather than the initial one allocated by KVM in svm->sev_es.vmsa. In
+ * theory, svm->sev_es.vmsa could be free'd and cleaned up here, but
+ * that involves cleanups like flushing caches, which would ideally be
+ * handled during teardown rather than guest boot. Deferring that also
+ * allows the existing logic for SEV-ES VMSAs to be re-used with
+ * minimal SNP-specific changes.
+ */
+ svm->sev_es.snp_has_guest_vmsa = true;
- svm->sev_es.snp_ap_waiting_for_reset = false;
+ /* Use the new VMSA */
+ svm->vmcb->control.vmsa_pa = pfn_to_hpa(pfn);
- ret = __sev_snp_update_protected_guest_state(vcpu);
- if (ret)
- vcpu_unimpl(vcpu, "snp: AP state update on init failed\n");
+ /* Mark the vCPU as runnable */
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
-unlock:
- mutex_unlock(&svm->sev_es.snp_vmsa_mutex);
+ /*
+ * gmem pages aren't currently migratable, but if this ever changes
+ * then care should be taken to ensure svm->sev_es.vmsa is pinned
+ * through some other means.
+ */
+ kvm_release_page_clean(page);
}
static int sev_snp_ap_creation(struct vcpu_svm *svm)
{
- struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(svm->vcpu.kvm);
struct kvm_vcpu *vcpu = &svm->vcpu;
struct kvm_vcpu *target_vcpu;
struct vcpu_svm *target_svm;
unsigned int request;
unsigned int apic_id;
- bool kick;
- int ret;
request = lower_32_bits(svm->vmcb->control.exit_info_1);
apic_id = upper_32_bits(svm->vmcb->control.exit_info_1);
@@ -3939,47 +4053,23 @@ static int sev_snp_ap_creation(struct vcpu_svm *svm)
return -EINVAL;
}
- ret = 0;
-
target_svm = to_svm(target_vcpu);
- /*
- * The target vCPU is valid, so the vCPU will be kicked unless the
- * request is for CREATE_ON_INIT. For any errors at this stage, the
- * kick will place the vCPU in an non-runnable state.
- */
- kick = true;
-
- mutex_lock(&target_svm->sev_es.snp_vmsa_mutex);
-
- target_svm->sev_es.snp_vmsa_gpa = INVALID_PAGE;
- target_svm->sev_es.snp_ap_waiting_for_reset = true;
-
- /* Interrupt injection mode shouldn't change for AP creation */
- if (request < SVM_VMGEXIT_AP_DESTROY) {
- u64 sev_features;
-
- sev_features = vcpu->arch.regs[VCPU_REGS_RAX];
- sev_features ^= sev->vmsa_features;
-
- if (sev_features & SVM_SEV_FEAT_INT_INJ_MODES) {
- vcpu_unimpl(vcpu, "vmgexit: invalid AP injection mode [%#lx] from guest\n",
- vcpu->arch.regs[VCPU_REGS_RAX]);
- ret = -EINVAL;
- goto out;
- }
- }
+ guard(mutex)(&target_svm->sev_es.snp_vmsa_mutex);
switch (request) {
case SVM_VMGEXIT_AP_CREATE_ON_INIT:
- kick = false;
- fallthrough;
case SVM_VMGEXIT_AP_CREATE:
+ if (vcpu->arch.regs[VCPU_REGS_RAX] != sev->vmsa_features) {
+ vcpu_unimpl(vcpu, "vmgexit: mismatched AP sev_features [%#lx] != [%#llx] from guest\n",
+ vcpu->arch.regs[VCPU_REGS_RAX], sev->vmsa_features);
+ return -EINVAL;
+ }
+
if (!page_address_valid(vcpu, svm->vmcb->control.exit_info_2)) {
vcpu_unimpl(vcpu, "vmgexit: invalid AP VMSA address [%#llx] from guest\n",
svm->vmcb->control.exit_info_2);
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
/*
@@ -3993,30 +4083,30 @@ static int sev_snp_ap_creation(struct vcpu_svm *svm)
vcpu_unimpl(vcpu,
"vmgexit: AP VMSA address [%llx] from guest is unsafe as it is 2M aligned\n",
svm->vmcb->control.exit_info_2);
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
target_svm->sev_es.snp_vmsa_gpa = svm->vmcb->control.exit_info_2;
break;
case SVM_VMGEXIT_AP_DESTROY:
+ target_svm->sev_es.snp_vmsa_gpa = INVALID_PAGE;
break;
default:
vcpu_unimpl(vcpu, "vmgexit: invalid AP creation request [%#x] from guest\n",
request);
- ret = -EINVAL;
- break;
+ return -EINVAL;
}
-out:
- if (kick) {
- kvm_make_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, target_vcpu);
- kvm_vcpu_kick(target_vcpu);
- }
+ target_svm->sev_es.snp_ap_waiting_for_reset = true;
- mutex_unlock(&target_svm->sev_es.snp_vmsa_mutex);
+ /*
+ * Unless Creation is deferred until INIT, signal the vCPU to update
+ * its state.
+ */
+ if (request != SVM_VMGEXIT_AP_CREATE_ON_INIT)
+ kvm_make_request_and_kick(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, target_vcpu);
- return ret;
+ return 0;
}
static int snp_handle_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa)
@@ -4055,7 +4145,8 @@ static int snp_handle_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_
goto out_unlock;
}
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, SNP_GUEST_ERR(0, fw_err));
+ /* No action is requested *from KVM* if there was a firmware error. */
+ svm_vmgexit_no_action(svm, SNP_GUEST_ERR(0, fw_err));
ret = 1; /* resume guest */
@@ -4111,8 +4202,7 @@ static int snp_handle_ext_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t r
return snp_handle_guest_req(svm, req_gpa, resp_gpa);
request_invalid:
- ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_INPUT);
+ svm_vmgexit_bad_input(svm, GHCB_ERR_INVALID_INPUT);
return 1; /* resume guest */
}
@@ -4120,7 +4210,7 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
struct kvm_vcpu *vcpu = &svm->vcpu;
- struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(vcpu->kvm);
u64 ghcb_info;
int ret = 1;
@@ -4304,10 +4394,9 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
if (ret)
return ret;
- ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 0);
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 0);
+ svm_vmgexit_success(svm, 0);
- exit_code = kvm_ghcb_get_sw_exit_code(control);
+ exit_code = kvm_get_cached_sw_exit_code(control);
switch (exit_code) {
case SVM_VMGEXIT_MMIO_READ:
ret = setup_vmgexit_scratch(svm, true, control->exit_info_2);
@@ -4340,7 +4429,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
ret = kvm_emulate_ap_reset_hold(vcpu);
break;
case SVM_VMGEXIT_AP_JUMP_TABLE: {
- struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(vcpu->kvm);
switch (control->exit_info_1) {
case 0:
@@ -4349,21 +4438,19 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
break;
case 1:
/* Get AP jump table address */
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, sev->ap_jump_table);
+ svm_vmgexit_success(svm, sev->ap_jump_table);
break;
default:
pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
control->exit_info_1);
- ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_INPUT);
+ svm_vmgexit_bad_input(svm, GHCB_ERR_INVALID_INPUT);
}
ret = 1;
break;
}
case SVM_VMGEXIT_HV_FEATURES:
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_HV_FT_SUPPORTED);
-
+ svm_vmgexit_success(svm, GHCB_HV_FT_SUPPORTED);
ret = 1;
break;
case SVM_VMGEXIT_TERM_REQUEST:
@@ -4384,8 +4471,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
case SVM_VMGEXIT_AP_CREATION:
ret = sev_snp_ap_creation(svm);
if (ret) {
- ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_INPUT);
+ svm_vmgexit_bad_input(svm, GHCB_ERR_INVALID_INPUT);
}
ret = 1;
@@ -4430,35 +4516,36 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
count, in);
}
-static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm)
+void sev_es_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu = &svm->vcpu;
+ /* Clear intercepts on MSRs that are context switched by hardware. */
+ svm_disable_intercept_for_msr(vcpu, MSR_AMD64_SEV_ES_GHCB, MSR_TYPE_RW);
+ svm_disable_intercept_for_msr(vcpu, MSR_EFER, MSR_TYPE_RW);
+ svm_disable_intercept_for_msr(vcpu, MSR_IA32_CR_PAT, MSR_TYPE_RW);
- if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) {
- bool v_tsc_aux = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) ||
- guest_cpuid_has(vcpu, X86_FEATURE_RDPID);
+ if (boot_cpu_has(X86_FEATURE_V_TSC_AUX))
+ svm_set_intercept_for_msr(vcpu, MSR_TSC_AUX, MSR_TYPE_RW,
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP) &&
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID));
- set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, v_tsc_aux, v_tsc_aux);
- }
+ svm_set_intercept_for_msr(vcpu, MSR_AMD64_GUEST_TSC_FREQ, MSR_TYPE_R,
+ !snp_is_secure_tsc_enabled(vcpu->kvm));
/*
* For SEV-ES, accesses to MSR_IA32_XSS should not be intercepted if
* the host/guest supports its use.
*
- * guest_can_use() checks a number of requirements on the host/guest to
- * ensure that MSR_IA32_XSS is available, but it might report true even
- * if X86_FEATURE_XSAVES isn't configured in the guest to ensure host
- * MSR_IA32_XSS is always properly restored. For SEV-ES, it is better
- * to further check that the guest CPUID actually supports
- * X86_FEATURE_XSAVES so that accesses to MSR_IA32_XSS by misbehaved
- * guests will still get intercepted and caught in the normal
- * kvm_emulate_rdmsr()/kvm_emulated_wrmsr() paths.
+ * KVM treats the guest as being capable of using XSAVES even if XSAVES
+ * isn't enabled in guest CPUID as there is no intercept for XSAVES,
+ * i.e. the guest can use XSAVES/XRSTOR to read/write XSS if XSAVE is
+ * exposed to the guest and XSAVES is supported in hardware. Condition
+ * full XSS passthrough on the guest being able to use XSAVES *and*
+ * XSAVES being exposed to the guest so that KVM can at least honor
+ * guest CPUID for RDMSR and WRMSR.
*/
- if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
- guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 1, 1);
- else
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 0, 0);
+ svm_set_intercept_for_msr(vcpu, MSR_IA32_XSS, MSR_TYPE_RW,
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVES) ||
+ !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES));
}
void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm)
@@ -4470,15 +4557,12 @@ void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm)
best = kvm_find_cpuid_entry(vcpu, 0x8000001F);
if (best)
vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f));
-
- if (sev_es_guest(svm->vcpu.kvm))
- sev_es_vcpu_after_set_cpuid(svm);
}
-static void sev_es_init_vmcb(struct vcpu_svm *svm)
+static void sev_es_init_vmcb(struct vcpu_svm *svm, bool init_event)
{
+ struct kvm_sev_info *sev = to_kvm_sev_info(svm->vcpu.kvm);
struct vmcb *vmcb = svm->vmcb01.ptr;
- struct kvm_vcpu *vcpu = &svm->vcpu;
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
@@ -4489,8 +4573,16 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
* the VMSA will be NULL if this vCPU is the destination for intrahost
* migration, and will be copied later.
*/
- if (svm->sev_es.vmsa && !svm->sev_es.snp_has_guest_vmsa)
- svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
+ if (!svm->sev_es.snp_has_guest_vmsa) {
+ if (svm->sev_es.vmsa)
+ svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
+ else
+ svm->vmcb->control.vmsa_pa = INVALID_PAGE;
+ }
+
+ if (cpu_feature_enabled(X86_FEATURE_ALLOWED_SEV_FEATURES))
+ svm->vmcb->control.allowed_sev_features = sev->vmsa_features |
+ VMCB_ALLOWED_SEV_FEATURES_VALID;
/* Can't intercept CR register access, HV can't modify CR registers */
svm_clr_intercept(svm, INTERCEPT_CR0_READ);
@@ -4529,13 +4621,20 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
/* Can't intercept XSETBV, HV can't modify XCR0 directly */
svm_clr_intercept(svm, INTERCEPT_XSETBV);
- /* Clear intercepts on selected MSRs */
- set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1);
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1);
+ /*
+ * Set the GHCB MSR value as per the GHCB specification when emulating
+ * vCPU RESET for an SEV-ES guest.
+ */
+ if (!init_event)
+ set_ghcb_msr(svm, GHCB_MSR_SEV_INFO((__u64)sev->ghcb_version,
+ GHCB_VERSION_MIN,
+ sev_enc_bit));
}
-void sev_init_vmcb(struct vcpu_svm *svm)
+void sev_init_vmcb(struct vcpu_svm *svm, bool init_event)
{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
clr_exception_intercept(svm, UD_VECTOR);
@@ -4545,28 +4644,42 @@ void sev_init_vmcb(struct vcpu_svm *svm)
*/
clr_exception_intercept(svm, GP_VECTOR);
- if (sev_es_guest(svm->vcpu.kvm))
- sev_es_init_vmcb(svm);
+ if (init_event && sev_snp_guest(vcpu->kvm))
+ sev_snp_init_protected_guest_state(vcpu);
+
+ if (sev_es_guest(vcpu->kvm))
+ sev_es_init_vmcb(svm, init_event);
}
-void sev_es_vcpu_reset(struct vcpu_svm *svm)
+int sev_vcpu_create(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu = &svm->vcpu;
- struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct page *vmsa_page;
+
+ mutex_init(&svm->sev_es.snp_vmsa_mutex);
+
+ if (!sev_es_guest(vcpu->kvm))
+ return 0;
/*
- * Set the GHCB MSR value as per the GHCB specification when emulating
- * vCPU RESET for an SEV-ES guest.
+ * SEV-ES guests require a separate (from the VMCB) VMSA page used to
+ * contain the encrypted register state of the guest.
*/
- set_ghcb_msr(svm, GHCB_MSR_SEV_INFO((__u64)sev->ghcb_version,
- GHCB_VERSION_MIN,
- sev_enc_bit));
+ vmsa_page = snp_safe_alloc_page();
+ if (!vmsa_page)
+ return -ENOMEM;
- mutex_init(&svm->sev_es.snp_vmsa_mutex);
+ svm->sev_es.vmsa = page_address(vmsa_page);
+
+ vcpu->arch.guest_tsc_protected = snp_is_secure_tsc_enabled(vcpu->kvm);
+
+ return 0;
}
void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa)
{
+ struct kvm *kvm = svm->vcpu.kvm;
+
/*
* All host state for SEV-ES guests is categorized into three swap types
* based on how it is handled by hardware during a world switch:
@@ -4590,19 +4703,37 @@ void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_are
/*
* If DebugSwap is enabled, debug registers are loaded but NOT saved by
- * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU both
- * saves and loads debug registers (Type-A).
+ * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU does
+ * not save or load debug registers. Sadly, KVM can't prevent SNP
+ * guests from lying about DebugSwap on secondary vCPUs, i.e. the
+ * SEV_FEATURES provided at "AP Create" isn't guaranteed to match what
+ * the guest has actually enabled (or not!) in the VMSA.
+ *
+ * If DebugSwap is *possible*, save the masks so that they're restored
+ * if the guest enables DebugSwap. But for the DRs themselves, do NOT
+ * rely on the CPU to restore the host values; KVM will restore them as
+ * needed in common code, via hw_breakpoint_restore(). Note, KVM does
+ * NOT support virtualizing Breakpoint Extensions, i.e. the mask MSRs
+ * don't need to be restored per se, KVM just needs to ensure they are
+ * loaded with the correct values *if* the CPU writes the MSRs.
*/
- if (sev_vcpu_has_debug_swap(svm)) {
- hostsa->dr0 = native_get_debugreg(0);
- hostsa->dr1 = native_get_debugreg(1);
- hostsa->dr2 = native_get_debugreg(2);
- hostsa->dr3 = native_get_debugreg(3);
+ if (sev_vcpu_has_debug_swap(svm) ||
+ (sev_snp_guest(kvm) && cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP))) {
hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0);
hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1);
hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2);
hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3);
}
+
+ /*
+ * TSC_AUX is always virtualized for SEV-ES guests when the feature is
+ * available, i.e. TSC_AUX is loaded on #VMEXIT from the host save area.
+ * Set the save area to the current hardware value, i.e. the current
+ * user return value, so that the correct value is restored on #VMEXIT.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_V_TSC_AUX) &&
+ !WARN_ON_ONCE(tsc_aux_uret_slot < 0))
+ hostsa->tsc_aux = kvm_get_user_return_msr(tsc_aux_uret_slot);
}
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
@@ -4622,7 +4753,7 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
* Return from an AP Reset Hold VMGEXIT, where the guest will
* set the CS and RIP. Set SW_EXIT_INFO_2 to a non-zero value.
*/
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1);
+ svm_vmgexit_success(svm, 1);
break;
case AP_RESET_HOLD_MSR_PROTO:
/*
@@ -4700,7 +4831,7 @@ void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code)
}
slot = gfn_to_memslot(kvm, gfn);
- if (!kvm_slot_can_be_private(slot)) {
+ if (!kvm_slot_has_gmem(slot)) {
pr_warn_ratelimited("SEV: Unexpected RMP fault, non-private slot for GPA 0x%llx\n",
gpa);
return;
@@ -4820,7 +4951,7 @@ static bool is_large_rmp_possible(struct kvm *kvm, kvm_pfn_t pfn, int order)
int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
kvm_pfn_t pfn_aligned;
gfn_t gfn_aligned;
int level, rc;
@@ -4910,7 +5041,7 @@ void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end)
/*
* SEV-ES avoids host/guest cache coherency issues through
- * WBINVD hooks issued via MMU notifiers during run-time, and
+ * WBNOINVD hooks issued via MMU notifiers during run-time, and
* KVM's VM destroy path at shutdown. Those MMU notifier events
* don't cover gmem since there is no requirement to map pages
* to a HVA in order to use them for a running guest. While the
@@ -4928,7 +5059,7 @@ next_pfn:
}
}
-int sev_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn)
+int sev_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private)
{
int level, rc;
bool assigned;
@@ -4942,3 +5073,97 @@ int sev_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn)
return level;
}
+
+struct vmcb_save_area *sev_decrypt_vmsa(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb_save_area *vmsa;
+ struct kvm_sev_info *sev;
+ int error = 0;
+ int ret;
+
+ if (!sev_es_guest(vcpu->kvm))
+ return NULL;
+
+ /*
+ * If the VMSA has not yet been encrypted, return a pointer to the
+ * current un-encrypted VMSA.
+ */
+ if (!vcpu->arch.guest_state_protected)
+ return (struct vmcb_save_area *)svm->sev_es.vmsa;
+
+ sev = to_kvm_sev_info(vcpu->kvm);
+
+ /* Check if the SEV policy allows debugging */
+ if (sev_snp_guest(vcpu->kvm)) {
+ if (!(sev->policy & SNP_POLICY_MASK_DEBUG))
+ return NULL;
+ } else {
+ if (sev->policy & SEV_POLICY_MASK_NODBG)
+ return NULL;
+ }
+
+ if (sev_snp_guest(vcpu->kvm)) {
+ struct sev_data_snp_dbg dbg = {0};
+
+ vmsa = snp_alloc_firmware_page(__GFP_ZERO);
+ if (!vmsa)
+ return NULL;
+
+ dbg.gctx_paddr = __psp_pa(sev->snp_context);
+ dbg.src_addr = svm->vmcb->control.vmsa_pa;
+ dbg.dst_addr = __psp_pa(vmsa);
+
+ ret = sev_do_cmd(SEV_CMD_SNP_DBG_DECRYPT, &dbg, &error);
+
+ /*
+ * Return the target page to a hypervisor page no matter what.
+ * If this fails, the page can't be used, so leak it and don't
+ * try to use it.
+ */
+ if (snp_page_reclaim(vcpu->kvm, PHYS_PFN(__pa(vmsa))))
+ return NULL;
+
+ if (ret) {
+ pr_err("SEV: SNP_DBG_DECRYPT failed ret=%d, fw_error=%d (%#x)\n",
+ ret, error, error);
+ free_page((unsigned long)vmsa);
+
+ return NULL;
+ }
+ } else {
+ struct sev_data_dbg dbg = {0};
+ struct page *vmsa_page;
+
+ vmsa_page = alloc_page(GFP_KERNEL);
+ if (!vmsa_page)
+ return NULL;
+
+ vmsa = page_address(vmsa_page);
+
+ dbg.handle = sev->handle;
+ dbg.src_addr = svm->vmcb->control.vmsa_pa;
+ dbg.dst_addr = __psp_pa(vmsa);
+ dbg.len = PAGE_SIZE;
+
+ ret = sev_do_cmd(SEV_CMD_DBG_DECRYPT, &dbg, &error);
+ if (ret) {
+ pr_err("SEV: SEV_CMD_DBG_DECRYPT failed ret=%d, fw_error=%d (0x%x)\n",
+ ret, error, error);
+ __free_page(vmsa_page);
+
+ return NULL;
+ }
+ }
+
+ return vmsa;
+}
+
+void sev_free_decrypted_vmsa(struct kvm_vcpu *vcpu, struct vmcb_save_area *vmsa)
+{
+ /* If the VMSA has not yet been encrypted, nothing was allocated */
+ if (!vcpu->arch.guest_state_protected || !vmsa)
+ return;
+
+ free_page((unsigned long)vmsa);
+}