summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/svm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/svm')
-rw-r--r--arch/x86/kvm/svm/nested.c62
-rw-r--r--arch/x86/kvm/svm/sev.c127
-rw-r--r--arch/x86/kvm/svm/svm.c315
-rw-r--r--arch/x86/kvm/svm/svm.h22
-rw-r--r--arch/x86/kvm/svm/svm_ops.h4
5 files changed, 349 insertions, 181 deletions
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 2545d0c61985..f8b7bc04b3e7 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -238,6 +238,18 @@ static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)
kvm_vcpu_is_legal_gpa(vcpu, addr + size - 1);
}
+static bool nested_svm_check_tlb_ctl(struct kvm_vcpu *vcpu, u8 tlb_ctl)
+{
+ /* Nested FLUSHBYASID is not supported yet. */
+ switch(tlb_ctl) {
+ case TLB_CONTROL_DO_NOTHING:
+ case TLB_CONTROL_FLUSH_ALL_ASID:
+ return true;
+ default:
+ return false;
+ }
+}
+
static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
struct vmcb_control_area *control)
{
@@ -257,6 +269,9 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
IOPM_SIZE)))
return false;
+ if (CC(!nested_svm_check_tlb_ctl(vcpu, control->tlb_ctl)))
+ return false;
+
return true;
}
@@ -538,22 +553,27 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
if (nested_npt_enabled(svm))
nested_svm_init_mmu_context(vcpu);
- svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset =
- vcpu->arch.l1_tsc_offset + svm->nested.ctl.tsc_offset;
+ vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(
+ vcpu->arch.l1_tsc_offset,
+ svm->nested.ctl.tsc_offset,
+ svm->tsc_ratio_msr);
+
+ svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset;
+
+ if (svm->tsc_ratio_msr != kvm_default_tsc_scaling_ratio) {
+ WARN_ON(!svm->tsc_scaling_enabled);
+ nested_svm_update_tsc_ratio_msr(vcpu);
+ }
svm->vmcb->control.int_ctl =
(svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
(svm->vmcb01.ptr->control.int_ctl & int_ctl_vmcb01_bits);
- svm->vmcb->control.virt_ext = svm->nested.ctl.virt_ext;
svm->vmcb->control.int_vector = svm->nested.ctl.int_vector;
svm->vmcb->control.int_state = svm->nested.ctl.int_state;
svm->vmcb->control.event_inj = svm->nested.ctl.event_inj;
svm->vmcb->control.event_inj_err = svm->nested.ctl.event_inj_err;
- svm->vmcb->control.pause_filter_count = svm->nested.ctl.pause_filter_count;
- svm->vmcb->control.pause_filter_thresh = svm->nested.ctl.pause_filter_thresh;
-
nested_svm_transition_tlb_flush(vcpu);
/* Enter Guest-Mode */
@@ -579,7 +599,7 @@ static void nested_svm_copy_common_state(struct vmcb *from_vmcb, struct vmcb *to
}
int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
- struct vmcb *vmcb12)
+ struct vmcb *vmcb12, bool from_vmrun)
{
struct vcpu_svm *svm = to_svm(vcpu);
int ret;
@@ -609,13 +629,16 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
nested_vmcb02_prepare_save(svm, vmcb12);
ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3,
- nested_npt_enabled(svm), true);
+ nested_npt_enabled(svm), from_vmrun);
if (ret)
return ret;
if (!npt_enabled)
vcpu->arch.mmu->inject_page_fault = svm_inject_page_fault_nested;
+ if (!from_vmrun)
+ kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+
svm_set_gif(svm, true);
return 0;
@@ -681,7 +704,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
svm->nested.nested_run_pending = 1;
- if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12))
+ if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, true))
goto out_exit_err;
if (nested_svm_vmrun_msrpm(svm))
@@ -808,11 +831,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb12->control.event_inj = svm->nested.ctl.event_inj;
vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err;
- vmcb12->control.pause_filter_count =
- svm->vmcb->control.pause_filter_count;
- vmcb12->control.pause_filter_thresh =
- svm->vmcb->control.pause_filter_thresh;
-
nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
svm_switch_vmcb(svm, &svm->vmcb01);
@@ -830,6 +848,12 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
}
+ if (svm->tsc_ratio_msr != kvm_default_tsc_scaling_ratio) {
+ WARN_ON(!svm->tsc_scaling_enabled);
+ vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
+ svm_write_tsc_multiplier(vcpu, vcpu->arch.tsc_scaling_ratio);
+ }
+
svm->nested.ctl.nested_cr3 = 0;
/*
@@ -1217,6 +1241,16 @@ int nested_svm_exit_special(struct vcpu_svm *svm)
return NESTED_EXIT_CONTINUE;
}
+void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ vcpu->arch.tsc_scaling_ratio =
+ kvm_calc_nested_tsc_multiplier(vcpu->arch.l1_tsc_scaling_ratio,
+ svm->tsc_ratio_msr);
+ svm_write_tsc_multiplier(vcpu, vcpu->arch.tsc_scaling_ratio);
+}
+
static int svm_get_nested_state(struct kvm_vcpu *vcpu,
struct kvm_nested_state __user *user_kvm_nested_state,
u32 user_data_size)
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 75e0b21ad07c..1964b9a174be 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -17,10 +17,10 @@
#include <linux/misc_cgroup.h>
#include <linux/processor.h>
#include <linux/trace_events.h>
-#include <asm/fpu/internal.h>
#include <asm/pkru.h>
#include <asm/trapnr.h>
+#include <asm/fpu/xcr.h>
#include "x86.h"
#include "svm.h"
@@ -595,43 +595,55 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
return 0;
}
-static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
+static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
+ int *error)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_launch_update_vmsa vmsa;
+ struct vcpu_svm *svm = to_svm(vcpu);
+ int ret;
+
+ /* Perform some pre-encryption checks against the VMSA */
+ ret = sev_es_sync_vmsa(svm);
+ if (ret)
+ return ret;
+
+ /*
+ * The LAUNCH_UPDATE_VMSA command will perform in-place encryption of
+ * the VMSA memory content (i.e it will write the same memory region
+ * with the guest's key), so invalidate it first.
+ */
+ clflush_cache_range(svm->vmsa, PAGE_SIZE);
+
+ vmsa.reserved = 0;
+ vmsa.handle = to_kvm_svm(kvm)->sev_info.handle;
+ vmsa.address = __sme_pa(svm->vmsa);
+ vmsa.len = PAGE_SIZE;
+ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error);
+ if (ret)
+ return ret;
+
+ vcpu->arch.guest_state_protected = true;
+ return 0;
+}
+
+static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
struct kvm_vcpu *vcpu;
int i, ret;
if (!sev_es_guest(kvm))
return -ENOTTY;
- vmsa.reserved = 0;
-
kvm_for_each_vcpu(i, vcpu, kvm) {
- struct vcpu_svm *svm = to_svm(vcpu);
-
- /* Perform some pre-encryption checks against the VMSA */
- ret = sev_es_sync_vmsa(svm);
+ ret = mutex_lock_killable(&vcpu->mutex);
if (ret)
return ret;
- /*
- * The LAUNCH_UPDATE_VMSA command will perform in-place
- * encryption of the VMSA memory content (i.e it will write
- * the same memory region with the guest's key), so invalidate
- * it first.
- */
- clflush_cache_range(svm->vmsa, PAGE_SIZE);
+ ret = __sev_launch_update_vmsa(kvm, vcpu, &argp->error);
- vmsa.handle = sev->handle;
- vmsa.address = __sme_pa(svm->vmsa);
- vmsa.len = PAGE_SIZE;
- ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa,
- &argp->error);
+ mutex_unlock(&vcpu->mutex);
if (ret)
return ret;
-
- svm->vcpu.arch.guest_state_protected = true;
}
return 0;
@@ -1397,8 +1409,10 @@ static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* Bind ASID to this guest */
ret = sev_bind_asid(kvm, start.handle, error);
- if (ret)
+ if (ret) {
+ sev_decommission(start.handle);
goto e_free_session;
+ }
params.handle = start.handle;
if (copy_to_user((void __user *)(uintptr_t)argp->data,
@@ -1464,12 +1478,19 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* Pin guest memory */
guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
- PAGE_SIZE, &n, 0);
+ PAGE_SIZE, &n, 1);
if (IS_ERR(guest_page)) {
ret = PTR_ERR(guest_page);
goto e_free_trans;
}
+ /*
+ * Flush (on non-coherent CPUs) before RECEIVE_UPDATE_DATA, the PSP
+ * encrypts the written data with the guest's key, and the cache may
+ * contain dirty, unencrypted data.
+ */
+ sev_clflush_pages(guest_page, n);
+
/* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */
data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
data.guest_address |= sev_me_mask;
@@ -1501,6 +1522,20 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error);
}
+static bool cmd_allowed_from_miror(u32 cmd_id)
+{
+ /*
+ * Allow mirrors VM to call KVM_SEV_LAUNCH_UPDATE_VMSA to enable SEV-ES
+ * active mirror VMs. Also allow the debugging and status commands.
+ */
+ if (cmd_id == KVM_SEV_LAUNCH_UPDATE_VMSA ||
+ cmd_id == KVM_SEV_GUEST_STATUS || cmd_id == KVM_SEV_DBG_DECRYPT ||
+ cmd_id == KVM_SEV_DBG_ENCRYPT)
+ return true;
+
+ return false;
+}
+
int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
{
struct kvm_sev_cmd sev_cmd;
@@ -1517,8 +1552,9 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
mutex_lock(&kvm->lock);
- /* enc_context_owner handles all memory enc operations */
- if (is_mirroring_enc_context(kvm)) {
+ /* Only the enc_context_owner handles some memory enc operations. */
+ if (is_mirroring_enc_context(kvm) &&
+ !cmd_allowed_from_miror(sev_cmd.id)) {
r = -EINVAL;
goto out;
}
@@ -1715,8 +1751,7 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
{
struct file *source_kvm_file;
struct kvm *source_kvm;
- struct kvm_sev_info *mirror_sev;
- unsigned int asid;
+ struct kvm_sev_info source_sev, *mirror_sev;
int ret;
source_kvm_file = fget(source_fd);
@@ -1739,7 +1774,8 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
goto e_source_unlock;
}
- asid = to_kvm_svm(source_kvm)->sev_info.asid;
+ memcpy(&source_sev, &to_kvm_svm(source_kvm)->sev_info,
+ sizeof(source_sev));
/*
* The mirror kvm holds an enc_context_owner ref so its asid can't
@@ -1759,8 +1795,16 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
/* Set enc_context_owner and copy its encryption context over */
mirror_sev = &to_kvm_svm(kvm)->sev_info;
mirror_sev->enc_context_owner = source_kvm;
- mirror_sev->asid = asid;
mirror_sev->active = true;
+ mirror_sev->asid = source_sev.asid;
+ mirror_sev->fd = source_sev.fd;
+ mirror_sev->es_active = source_sev.es_active;
+ mirror_sev->handle = source_sev.handle;
+ /*
+ * Do not copy ap_jump_table. Since the mirror does not share the same
+ * KVM contexts as the original, and they may have different
+ * memory-views.
+ */
mutex_unlock(&kvm->lock);
return 0;
@@ -2547,11 +2591,20 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
{
- if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2))
+ int count;
+ int bytes;
+
+ if (svm->vmcb->control.exit_info_2 > INT_MAX)
+ return -EINVAL;
+
+ count = svm->vmcb->control.exit_info_2;
+ if (unlikely(check_mul_overflow(count, size, &bytes)))
+ return -EINVAL;
+
+ if (!setup_vmgexit_scratch(svm, in, bytes))
return -EINVAL;
- return kvm_sev_es_string_io(&svm->vcpu, size, port,
- svm->ghcb_sa, svm->ghcb_sa_len, in);
+ return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->ghcb_sa, count, in);
}
void sev_es_init_vmcb(struct vcpu_svm *svm)
@@ -2599,11 +2652,11 @@ void sev_es_init_vmcb(struct vcpu_svm *svm)
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
}
-void sev_es_create_vcpu(struct vcpu_svm *svm)
+void sev_es_vcpu_reset(struct vcpu_svm *svm)
{
/*
- * Set the GHCB MSR value as per the GHCB specification when creating
- * a vCPU for an SEV-ES guest.
+ * Set the GHCB MSR value as per the GHCB specification when emulating
+ * vCPU RESET for an SEV-ES guest.
*/
set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
GHCB_VERSION_MIN,
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 05e8d4d27969..b36ca4e476c2 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -25,6 +25,7 @@
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/rwsem.h>
+#include <linux/cc_platform.h>
#include <asm/apic.h>
#include <asm/perf_event.h>
@@ -36,6 +37,7 @@
#include <asm/spec-ctrl.h>
#include <asm/cpu_device_id.h>
#include <asm/traps.h>
+#include <asm/fpu/api.h>
#include <asm/virtext.h>
#include "trace.h"
@@ -186,6 +188,13 @@ module_param(vls, int, 0444);
static int vgif = true;
module_param(vgif, int, 0444);
+/* enable/disable LBR virtualization */
+static int lbrv = true;
+module_param(lbrv, int, 0444);
+
+static int tsc_scaling = true;
+module_param(tsc_scaling, int, 0444);
+
/*
* enable / disable AVIC. Because the defaults differ for APICv
* support between VMX and SVM we cannot use module_param_named.
@@ -455,7 +464,7 @@ static int has_svm(void)
return 0;
}
- if (sev_active()) {
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) {
pr_info("KVM is unsupported when running as an SEV guest\n");
return 0;
}
@@ -466,7 +475,7 @@ static int has_svm(void)
static void svm_hardware_disable(void)
{
/* Make sure we clean up behind us */
- if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
+ if (tsc_scaling)
wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
cpu_svm_disable();
@@ -509,6 +518,10 @@ static int svm_hardware_enable(void)
wrmsrl(MSR_VM_HSAVE_PA, __sme_page_pa(sd->save_area));
if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+ /*
+ * Set the default value, even if we don't use TSC scaling
+ * to avoid having stale value in the msr
+ */
wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
__this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT);
}
@@ -929,6 +942,9 @@ static __init void svm_set_cpu_caps(void)
if (npt_enabled)
kvm_cpu_cap_set(X86_FEATURE_NPT);
+ if (tsc_scaling)
+ kvm_cpu_cap_set(X86_FEATURE_TSCRATEMSR);
+
/* Nested VM can receive #VMEXIT instead of triggering #GP */
kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
}
@@ -976,10 +992,15 @@ static __init int svm_hardware_setup(void)
if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
kvm_enable_efer_bits(EFER_FFXSR);
- if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
- kvm_has_tsc_control = true;
- kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
- kvm_tsc_scaling_ratio_frac_bits = 32;
+ if (tsc_scaling) {
+ if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+ tsc_scaling = false;
+ } else {
+ pr_info("TSC scaling supported\n");
+ kvm_has_tsc_control = true;
+ kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
+ kvm_tsc_scaling_ratio_frac_bits = 32;
+ }
}
tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);
@@ -1059,6 +1080,13 @@ static __init int svm_hardware_setup(void)
pr_info("Virtual GIF supported\n");
}
+ if (lbrv) {
+ if (!boot_cpu_has(X86_FEATURE_LBRV))
+ lbrv = false;
+ else
+ pr_info("LBR virtualization supported\n");
+ }
+
svm_set_cpu_caps();
/*
@@ -1109,7 +1137,9 @@ static u64 svm_get_l2_tsc_offset(struct kvm_vcpu *vcpu)
static u64 svm_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
{
- return kvm_default_tsc_scaling_ratio;
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ return svm->tsc_ratio_msr;
}
static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
@@ -1121,7 +1151,7 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
}
-static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
+void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
{
wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
}
@@ -1150,6 +1180,38 @@ static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
}
}
+static inline void init_vmcb_after_set_cpuid(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (guest_cpuid_is_intel(vcpu)) {
+ /*
+ * We must intercept SYSENTER_EIP and SYSENTER_ESP
+ * accesses because the processor only stores 32 bits.
+ * For the same reason we cannot use virtual VMLOAD/VMSAVE.
+ */
+ svm_set_intercept(svm, INTERCEPT_VMLOAD);
+ svm_set_intercept(svm, INTERCEPT_VMSAVE);
+ svm->vmcb->control.virt_ext &= ~VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
+
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 0, 0);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 0, 0);
+ } else {
+ /*
+ * If hardware supports Virtual VMLOAD VMSAVE then enable it
+ * in VMCB and clear intercepts to avoid #VMEXIT.
+ */
+ if (vls) {
+ svm_clr_intercept(svm, INTERCEPT_VMLOAD);
+ svm_clr_intercept(svm, INTERCEPT_VMSAVE);
+ svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
+ }
+ /* No need to intercept these MSRs */
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 1, 1);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 1, 1);
+ }
+}
+
static void init_vmcb(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1296,11 +1358,25 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
}
svm_hv_init_vmcb(svm->vmcb);
+ init_vmcb_after_set_cpuid(vcpu);
vmcb_mark_all_dirty(svm->vmcb);
enable_gif(svm);
+}
+
+static void __svm_vcpu_reset(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ svm_vcpu_init_msrpm(vcpu, svm->msrpm);
+
+ svm_init_osvw(vcpu);
+ vcpu->arch.microcode_version = 0x01000065;
+ svm->tsc_ratio_msr = kvm_default_tsc_scaling_ratio;
+
+ if (sev_es_guest(vcpu->kvm))
+ sev_es_vcpu_reset(svm);
}
static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -1311,6 +1387,9 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
svm->virt_spec_ctrl = 0;
init_vmcb(vcpu);
+
+ if (!init_event)
+ __svm_vcpu_reset(vcpu);
}
void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb)
@@ -1346,10 +1425,10 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
/*
* SEV-ES guests maintain an encrypted version of their FPU
* state which is restored and saved on VMRUN and VMEXIT.
- * Free the fpu structure to prevent KVM from attempting to
- * access the FPU state.
+ * Mark vcpu->arch.guest_fpu->fpstate as scratch so it won't
+ * do xsave/xrstor on it.
*/
- kvm_free_guest_fpu(vcpu);
+ fpstate_set_confidential(&vcpu->arch.guest_fpu);
}
err = avic_init_vcpu(svm);
@@ -1370,24 +1449,13 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
svm->vmcb01.ptr = page_address(vmcb01_page);
svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT);
+ svm_switch_vmcb(svm, &svm->vmcb01);
if (vmsa_page)
svm->vmsa = page_address(vmsa_page);
svm->guest_state_loaded = false;
- svm_switch_vmcb(svm, &svm->vmcb01);
- init_vmcb(vcpu);
-
- svm_vcpu_init_msrpm(vcpu, svm->msrpm);
-
- svm_init_osvw(vcpu);
- vcpu->arch.microcode_version = 0x01000065;
-
- if (sev_es_guest(vcpu->kvm))
- /* Perform SEV-ES specific VMCB creation updates */
- sev_es_create_vcpu(svm);
-
return 0;
error_free_vmsa_page:
@@ -1447,7 +1515,7 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
vmsave(__sme_page_pa(sd->save_area));
}
- if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+ if (tsc_scaling) {
u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio;
if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) {
__this_cpu_write(current_tsc_ratio, tsc_ratio);
@@ -1566,6 +1634,8 @@ static void svm_clear_vintr(struct vcpu_svm *svm)
svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl &
V_IRQ_INJECTION_BITS_MASK;
+
+ svm->vmcb->control.int_vector = svm->nested.ctl.int_vector;
}
vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
@@ -2222,6 +2292,10 @@ static int gp_interception(struct kvm_vcpu *vcpu)
if (error_code)
goto reinject;
+ /* All SVM instructions expect page aligned RAX */
+ if (svm->vmcb->save.rax & ~PAGE_MASK)
+ goto reinject;
+
/* Decode the instruction for usage later */
if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK)
goto reinject;
@@ -2651,6 +2725,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
struct vcpu_svm *svm = to_svm(vcpu);
switch (msr_info->index) {
+ case MSR_AMD64_TSC_RATIO:
+ if (!msr_info->host_initiated && !svm->tsc_scaling_enabled)
+ return 1;
+ msr_info->data = svm->tsc_ratio_msr;
+ break;
case MSR_STAR:
msr_info->data = svm->vmcb01.ptr->save.star;
break;
@@ -2800,6 +2879,19 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
u32 ecx = msr->index;
u64 data = msr->data;
switch (ecx) {
+ case MSR_AMD64_TSC_RATIO:
+ if (!msr->host_initiated && !svm->tsc_scaling_enabled)
+ return 1;
+
+ if (data & TSC_RATIO_RSVD)
+ return 1;
+
+ svm->tsc_ratio_msr = data;
+
+ if (svm->tsc_scaling_enabled && is_guest_mode(vcpu))
+ nested_svm_update_tsc_ratio_msr(vcpu);
+
+ break;
case MSR_IA32_CR_PAT:
if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
return 1;
@@ -2912,7 +3004,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
svm->tsc_aux = data;
break;
case MSR_IA32_DEBUGCTLMSR:
- if (!boot_cpu_has(X86_FEATURE_LBRV)) {
+ if (!lbrv) {
vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
__func__, data);
break;
@@ -3272,11 +3364,13 @@ int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code)
return svm_exit_handlers[exit_code](vcpu);
}
-static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2,
+static void svm_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
+ u64 *info1, u64 *info2,
u32 *intr_info, u32 *error_code)
{
struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
+ *reason = control->exit_code;
*info1 = control->exit_info_1;
*info2 = control->exit_info_2;
*intr_info = control->exit_int_info;
@@ -3293,7 +3387,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
struct kvm_run *kvm_run = vcpu->run;
u32 exit_code = svm->vmcb->control.exit_code;
- trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
+ trace_kvm_exit(vcpu, KVM_ISA_SVM);
/* SEV-ES guests must use the CR write traps to track CR registers. */
if (!sev_es_guest(vcpu->kvm)) {
@@ -3306,7 +3400,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
if (is_guest_mode(vcpu)) {
int vmexit;
- trace_kvm_nested_vmexit(exit_code, vcpu, KVM_ISA_SVM);
+ trace_kvm_nested_vmexit(vcpu, KVM_ISA_SVM);
vmexit = nested_svm_exit_special(svm);
@@ -3774,8 +3868,6 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
pre_svm_run(vcpu);
- WARN_ON_ONCE(kvm_apicv_activated(vcpu->kvm) != kvm_vcpu_apicv_active(vcpu));
-
sync_lapic_to_cr8(vcpu);
if (unlikely(svm->asid != svm->vmcb->control.asid)) {
@@ -3995,6 +4087,8 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);
+ svm->tsc_scaling_enabled = tsc_scaling && guest_cpuid_has(vcpu, X86_FEATURE_TSCRATEMSR);
+
svm_recalc_instruction_intercepts(vcpu, svm);
/* For sev guests, the memory encryption bit is not reserved in CR3. */
@@ -4021,33 +4115,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
kvm_request_apicv_update(vcpu->kvm, false,
APICV_INHIBIT_REASON_NESTED);
}
-
- if (guest_cpuid_is_intel(vcpu)) {
- /*
- * We must intercept SYSENTER_EIP and SYSENTER_ESP
- * accesses because the processor only stores 32 bits.
- * For the same reason we cannot use virtual VMLOAD/VMSAVE.
- */
- svm_set_intercept(svm, INTERCEPT_VMLOAD);
- svm_set_intercept(svm, INTERCEPT_VMSAVE);
- svm->vmcb->control.virt_ext &= ~VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
-
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 0, 0);
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 0, 0);
- } else {
- /*
- * If hardware supports Virtual VMLOAD VMSAVE then enable it
- * in VMCB and clear intercepts to avoid #VMEXIT.
- */
- if (vls) {
- svm_clr_intercept(svm, INTERCEPT_VMLOAD);
- svm_clr_intercept(svm, INTERCEPT_VMSAVE);
- svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
- }
- /* No need to intercept these MSRs */
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 1, 1);
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 1, 1);
- }
+ init_vmcb_after_set_cpuid(vcpu);
}
static bool svm_has_wbinvd_exit(void)
@@ -4285,43 +4353,44 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
struct kvm_host_map map_save;
int ret;
- if (is_guest_mode(vcpu)) {
- /* FED8h - SVM Guest */
- put_smstate(u64, smstate, 0x7ed8, 1);
- /* FEE0h - SVM Guest VMCB Physical Address */
- put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
+ if (!is_guest_mode(vcpu))
+ return 0;
- svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
- svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
- svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
+ /* FED8h - SVM Guest */
+ put_smstate(u64, smstate, 0x7ed8, 1);
+ /* FEE0h - SVM Guest VMCB Physical Address */
+ put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
- ret = nested_svm_vmexit(svm);
- if (ret)
- return ret;
+ svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
+ svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
+ svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
- /*
- * KVM uses VMCB01 to store L1 host state while L2 runs but
- * VMCB01 is going to be used during SMM and thus the state will
- * be lost. Temporary save non-VMLOAD/VMSAVE state to the host save
- * area pointed to by MSR_VM_HSAVE_PA. APM guarantees that the
- * format of the area is identical to guest save area offsetted
- * by 0x400 (matches the offset of 'struct vmcb_save_area'
- * within 'struct vmcb'). Note: HSAVE area may also be used by
- * L1 hypervisor to save additional host context (e.g. KVM does
- * that, see svm_prepare_guest_switch()) which must be
- * preserved.
- */
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
- &map_save) == -EINVAL)
- return 1;
+ ret = nested_svm_vmexit(svm);
+ if (ret)
+ return ret;
- BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
+ /*
+ * KVM uses VMCB01 to store L1 host state while L2 runs but
+ * VMCB01 is going to be used during SMM and thus the state will
+ * be lost. Temporary save non-VMLOAD/VMSAVE state to the host save
+ * area pointed to by MSR_VM_HSAVE_PA. APM guarantees that the
+ * format of the area is identical to guest save area offsetted
+ * by 0x400 (matches the offset of 'struct vmcb_save_area'
+ * within 'struct vmcb'). Note: HSAVE area may also be used by
+ * L1 hypervisor to save additional host context (e.g. KVM does
+ * that, see svm_prepare_guest_switch()) which must be
+ * preserved.
+ */
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
+ &map_save) == -EINVAL)
+ return 1;
- svm_copy_vmrun_state(map_save.hva + 0x400,
- &svm->vmcb01.ptr->save);
+ BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
- kvm_vcpu_unmap(vcpu, &map_save, true);
- }
+ svm_copy_vmrun_state(map_save.hva + 0x400,
+ &svm->vmcb01.ptr->save);
+
+ kvm_vcpu_unmap(vcpu, &map_save, true);
return 0;
}
@@ -4329,50 +4398,54 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_host_map map, map_save;
- int ret = 0;
+ u64 saved_efer, vmcb12_gpa;
+ struct vmcb *vmcb12;
+ int ret;
- if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
- u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
- u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8);
- u64 vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
- struct vmcb *vmcb12;
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
+ return 0;
- if (guest) {
- if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
- return 1;
+ /* Non-zero if SMI arrived while vCPU was in guest mode. */
+ if (!GET_SMSTATE(u64, smstate, 0x7ed8))
+ return 0;
- if (!(saved_efer & EFER_SVME))
- return 1;
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
+ return 1;
- if (kvm_vcpu_map(vcpu,
- gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
- return 1;
+ saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
+ if (!(saved_efer & EFER_SVME))
+ return 1;
- if (svm_allocate_nested(svm))
- return 1;
+ vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
+ return 1;
- vmcb12 = map.hva;
+ ret = 1;
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save) == -EINVAL)
+ goto unmap_map;
- nested_load_control_from_vmcb12(svm, &vmcb12->control);
+ if (svm_allocate_nested(svm))
+ goto unmap_save;
- ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12);
- kvm_vcpu_unmap(vcpu, &map, true);
+ /*
+ * Restore L1 host state from L1 HSAVE area as VMCB01 was
+ * used during SMM (see svm_enter_smm())
+ */
- /*
- * Restore L1 host state from L1 HSAVE area as VMCB01 was
- * used during SMM (see svm_enter_smm())
- */
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
- &map_save) == -EINVAL)
- return 1;
+ svm_copy_vmrun_state(&svm->vmcb01.ptr->save, map_save.hva + 0x400);
- svm_copy_vmrun_state(&svm->vmcb01.ptr->save,
- map_save.hva + 0x400);
+ /*
+ * Enter the nested guest now
+ */
- kvm_vcpu_unmap(vcpu, &map_save, true);
- }
- }
+ vmcb12 = map.hva;
+ nested_load_control_from_vmcb12(svm, &vmcb12->control);
+ ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false);
+unmap_save:
+ kvm_vcpu_unmap(vcpu, &map_save, true);
+unmap_map:
+ kvm_vcpu_unmap(vcpu, &map, true);
return ret;
}
@@ -4509,6 +4582,8 @@ static int svm_vm_init(struct kvm *kvm)
}
static struct kvm_x86_ops svm_x86_ops __initdata = {
+ .name = "kvm_amd",
+
.hardware_unsetup = svm_hardware_teardown,
.hardware_enable = svm_hardware_enable,
.hardware_disable = svm_hardware_disable,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 524d943f3efc..5e9510d4574e 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -140,6 +140,8 @@ struct vcpu_svm {
u64 next_rip;
u64 spec_ctrl;
+
+ u64 tsc_ratio_msr;
/*
* Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be
* translated into the appropriate L2_CFG bits on the host to
@@ -160,7 +162,8 @@ struct vcpu_svm {
unsigned long int3_rip;
/* cached guest cpuid flags for faster access */
- bool nrips_enabled : 1;
+ bool nrips_enabled : 1;
+ bool tsc_scaling_enabled : 1;
u32 ldr_reg;
u32 dfr_reg;
@@ -191,7 +194,7 @@ struct vcpu_svm {
/* SEV-ES scratch area support */
void *ghcb_sa;
- u64 ghcb_sa_len;
+ u32 ghcb_sa_len;
bool ghcb_sa_sync;
bool ghcb_sa_free;
@@ -218,12 +221,12 @@ DECLARE_PER_CPU(struct svm_cpu_data *, svm_data);
void recalc_intercepts(struct vcpu_svm *svm);
-static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
+static __always_inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
{
return container_of(kvm, struct kvm_svm, kvm);
}
-static inline bool sev_guest(struct kvm *kvm)
+static __always_inline bool sev_guest(struct kvm *kvm)
{
#ifdef CONFIG_KVM_AMD_SEV
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
@@ -234,7 +237,7 @@ static inline bool sev_guest(struct kvm *kvm)
#endif
}
-static inline bool sev_es_guest(struct kvm *kvm)
+static __always_inline bool sev_es_guest(struct kvm *kvm)
{
#ifdef CONFIG_KVM_AMD_SEV
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
@@ -271,7 +274,7 @@ static inline bool vmcb_is_dirty(struct vmcb *vmcb, int bit)
return !test_bit(bit, (unsigned long *)&vmcb->control.clean);
}
-static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
+static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
{
return container_of(vcpu, struct vcpu_svm, vcpu);
}
@@ -459,7 +462,8 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_NMI);
}
-int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb_gpa, struct vmcb *vmcb12);
+int enter_svm_guest_mode(struct kvm_vcpu *vcpu,
+ u64 vmcb_gpa, struct vmcb *vmcb12, bool from_vmrun);
void svm_leave_nested(struct vcpu_svm *svm);
void svm_free_nested(struct vcpu_svm *svm);
int svm_allocate_nested(struct vcpu_svm *svm);
@@ -482,6 +486,8 @@ int nested_svm_check_permissions(struct kvm_vcpu *vcpu);
int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
bool has_error_code, u32 error_code);
int nested_svm_exit_special(struct vcpu_svm *svm);
+void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu);
+void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier);
void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
struct vmcb_control_area *control);
void nested_sync_control_from_vmcb02(struct vcpu_svm *svm);
@@ -561,7 +567,7 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu);
int sev_handle_vmgexit(struct kvm_vcpu *vcpu);
int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
void sev_es_init_vmcb(struct vcpu_svm *svm);
-void sev_es_create_vcpu(struct vcpu_svm *svm);
+void sev_es_vcpu_reset(struct vcpu_svm *svm);
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu);
void sev_es_unmap_ghcb(struct vcpu_svm *svm);
diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h
index 22e2b019de37..9430d6437c9f 100644
--- a/arch/x86/kvm/svm/svm_ops.h
+++ b/arch/x86/kvm/svm/svm_ops.h
@@ -56,12 +56,12 @@ static inline void invlpga(unsigned long addr, u32 asid)
* VMSAVE, VMLOAD, etc... is still controlled by the effective address size,
* hence 'unsigned long' instead of 'hpa_t'.
*/
-static inline void vmsave(unsigned long pa)
+static __always_inline void vmsave(unsigned long pa)
{
svm_asm1(vmsave, "a" (pa), "memory");
}
-static inline void vmload(unsigned long pa)
+static __always_inline void vmload(unsigned long pa)
{
svm_asm1(vmload, "a" (pa), "memory");
}