summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/vmx/nested.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/vmx/nested.c')
-rw-r--r--arch/x86/kvm/vmx/nested.c215
1 files changed, 188 insertions, 27 deletions
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index b8ea1969113d..76271962cb70 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -721,6 +721,24 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
MSR_IA32_MPERF, MSR_TYPE_R);
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_U_CET, MSR_TYPE_RW);
+
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_S_CET, MSR_TYPE_RW);
+
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_PL0_SSP, MSR_TYPE_RW);
+
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_PL1_SSP, MSR_TYPE_RW);
+
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_PL2_SSP, MSR_TYPE_RW);
+
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_PL3_SSP, MSR_TYPE_RW);
+
kvm_vcpu_unmap(vcpu, &map);
vmx->nested.force_msr_bitmap_recalc = false;
@@ -997,7 +1015,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
__func__, i, e.index, e.reserved);
goto fail;
}
- if (kvm_set_msr_with_filter(vcpu, e.index, e.value)) {
+ if (kvm_emulate_msr_write(vcpu, e.index, e.value)) {
pr_debug_ratelimited(
"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
__func__, i, e.index, e.value);
@@ -1033,7 +1051,7 @@ static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu,
}
}
- if (kvm_get_msr_with_filter(vcpu, msr_index, data)) {
+ if (kvm_emulate_msr_read(vcpu, msr_index, data)) {
pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__,
msr_index);
return false;
@@ -1272,9 +1290,10 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
{
const u64 feature_bits = VMX_BASIC_DUAL_MONITOR_TREATMENT |
VMX_BASIC_INOUT |
- VMX_BASIC_TRUE_CTLS;
+ VMX_BASIC_TRUE_CTLS |
+ VMX_BASIC_NO_HW_ERROR_CODE_CC;
- const u64 reserved_bits = GENMASK_ULL(63, 56) |
+ const u64 reserved_bits = GENMASK_ULL(63, 57) |
GENMASK_ULL(47, 45) |
BIT_ULL(31);
@@ -2520,6 +2539,32 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
}
}
+static void vmcs_read_cet_state(struct kvm_vcpu *vcpu, u64 *s_cet,
+ u64 *ssp, u64 *ssp_tbl)
+{
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_IBT) ||
+ guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK))
+ *s_cet = vmcs_readl(GUEST_S_CET);
+
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK)) {
+ *ssp = vmcs_readl(GUEST_SSP);
+ *ssp_tbl = vmcs_readl(GUEST_INTR_SSP_TABLE);
+ }
+}
+
+static void vmcs_write_cet_state(struct kvm_vcpu *vcpu, u64 s_cet,
+ u64 ssp, u64 ssp_tbl)
+{
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_IBT) ||
+ guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK))
+ vmcs_writel(GUEST_S_CET, s_cet);
+
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK)) {
+ vmcs_writel(GUEST_SSP, ssp);
+ vmcs_writel(GUEST_INTR_SSP_TABLE, ssp_tbl);
+ }
+}
+
static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
{
struct hv_enlightened_vmcs *hv_evmcs = nested_vmx_evmcs(vmx);
@@ -2636,6 +2681,10 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
+ if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE)
+ vmcs_write_cet_state(&vmx->vcpu, vmcs12->guest_s_cet,
+ vmcs12->guest_ssp, vmcs12->guest_ssp_tbl);
+
set_cr4_guest_host_mask(vmx);
}
@@ -2675,6 +2724,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
vmx_guest_debugctl_write(vcpu, vmx->nested.pre_vmenter_debugctl);
}
+
+ if (!vmx->nested.nested_run_pending ||
+ !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE))
+ vmcs_write_cet_state(vcpu, vmx->nested.pre_vmenter_s_cet,
+ vmx->nested.pre_vmenter_ssp,
+ vmx->nested.pre_vmenter_ssp_tbl);
+
if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending ||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
vmcs_write64(GUEST_BNDCFGS, vmx->nested.pre_vmenter_bndcfgs);
@@ -2770,8 +2826,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)) &&
- WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
- vmcs12->guest_ia32_perf_global_ctrl))) {
+ WARN_ON_ONCE(__kvm_emulate_msr_write(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+ vmcs12->guest_ia32_perf_global_ctrl))) {
*entry_failure_code = ENTRY_FAIL_DEFAULT;
return -EINVAL;
}
@@ -2949,7 +3005,6 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
u8 vector = intr_info & INTR_INFO_VECTOR_MASK;
u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK;
bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK;
- bool should_have_error_code;
bool urg = nested_cpu_has2(vmcs12,
SECONDARY_EXEC_UNRESTRICTED_GUEST);
bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE;
@@ -2966,12 +3021,19 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
CC(intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
return -EINVAL;
- /* VM-entry interruption-info field: deliver error code */
- should_have_error_code =
- intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode &&
- x86_exception_has_error_code(vector);
- if (CC(has_error_code != should_have_error_code))
- return -EINVAL;
+ /*
+ * Cannot deliver error code in real mode or if the interrupt
+ * type is not hardware exception. For other cases, do the
+ * consistency check only if the vCPU doesn't enumerate
+ * VMX_BASIC_NO_HW_ERROR_CODE_CC.
+ */
+ if (!prot_mode || intr_type != INTR_TYPE_HARD_EXCEPTION) {
+ if (CC(has_error_code))
+ return -EINVAL;
+ } else if (!nested_cpu_has_no_hw_errcode_cc(vcpu)) {
+ if (CC(has_error_code != x86_exception_has_error_code(vector)))
+ return -EINVAL;
+ }
/* VM-entry exception error code */
if (CC(has_error_code &&
@@ -3038,6 +3100,16 @@ static bool is_l1_noncanonical_address_on_vmexit(u64 la, struct vmcs12 *vmcs12)
return !__is_canonical_address(la, l1_address_bits_on_exit);
}
+static int nested_vmx_check_cet_state_common(struct kvm_vcpu *vcpu, u64 s_cet,
+ u64 ssp, u64 ssp_tbl)
+{
+ if (CC(!kvm_is_valid_u_s_cet(vcpu, s_cet)) || CC(!IS_ALIGNED(ssp, 4)) ||
+ CC(is_noncanonical_msr_address(ssp_tbl, vcpu)))
+ return -EINVAL;
+
+ return 0;
+}
+
static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
@@ -3048,6 +3120,9 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
CC(!kvm_vcpu_is_legal_cr3(vcpu, vmcs12->host_cr3)))
return -EINVAL;
+ if (CC(vmcs12->host_cr4 & X86_CR4_CET && !(vmcs12->host_cr0 & X86_CR0_WP)))
+ return -EINVAL;
+
if (CC(is_noncanonical_msr_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
CC(is_noncanonical_msr_address(vmcs12->host_ia32_sysenter_eip, vcpu)))
return -EINVAL;
@@ -3104,6 +3179,27 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
return -EINVAL;
}
+ if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_CET_STATE) {
+ if (nested_vmx_check_cet_state_common(vcpu, vmcs12->host_s_cet,
+ vmcs12->host_ssp,
+ vmcs12->host_ssp_tbl))
+ return -EINVAL;
+
+ /*
+ * IA32_S_CET and SSP must be canonical if the host will
+ * enter 64-bit mode after VM-exit; otherwise, higher
+ * 32-bits must be all 0s.
+ */
+ if (ia32e) {
+ if (CC(is_noncanonical_msr_address(vmcs12->host_s_cet, vcpu)) ||
+ CC(is_noncanonical_msr_address(vmcs12->host_ssp, vcpu)))
+ return -EINVAL;
+ } else {
+ if (CC(vmcs12->host_s_cet >> 32) || CC(vmcs12->host_ssp >> 32))
+ return -EINVAL;
+ }
+ }
+
return 0;
}
@@ -3162,6 +3258,9 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)))
return -EINVAL;
+ if (CC(vmcs12->guest_cr4 & X86_CR4_CET && !(vmcs12->guest_cr0 & X86_CR0_WP)))
+ return -EINVAL;
+
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) &&
(CC(!kvm_dr7_valid(vmcs12->guest_dr7)) ||
CC(!vmx_is_valid_debugctl(vcpu, vmcs12->guest_ia32_debugctl, false))))
@@ -3211,6 +3310,23 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))))
return -EINVAL;
+ if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE) {
+ if (nested_vmx_check_cet_state_common(vcpu, vmcs12->guest_s_cet,
+ vmcs12->guest_ssp,
+ vmcs12->guest_ssp_tbl))
+ return -EINVAL;
+
+ /*
+ * Guest SSP must have 63:N bits identical, rather than
+ * be canonical (i.e., 63:N-1 bits identical), where N is
+ * the CPU's maximum linear-address width. Similar to
+ * is_noncanonical_msr_address(), use the host's
+ * linear-address width.
+ */
+ if (CC(!__is_canonical_address(vmcs12->guest_ssp, max_host_virt_addr_bits() + 1)))
+ return -EINVAL;
+ }
+
if (nested_check_guest_non_reg_state(vmcs12))
return -EINVAL;
@@ -3544,6 +3660,12 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
vmx->nested.pre_vmenter_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
+ if (!vmx->nested.nested_run_pending ||
+ !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE))
+ vmcs_read_cet_state(vcpu, &vmx->nested.pre_vmenter_s_cet,
+ &vmx->nested.pre_vmenter_ssp,
+ &vmx->nested.pre_vmenter_ssp_tbl);
+
/*
* Overwrite vmcs01.GUEST_CR3 with L1's CR3 if EPT is disabled *and*
* nested early checks are disabled. In the event of a "late" VM-Fail,
@@ -3690,7 +3812,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
return 1;
}
- kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED);
+ kvm_pmu_branch_retired(vcpu);
if (CC(evmptrld_status == EVMPTRLD_VMFAIL))
return nested_vmx_failInvalid(vcpu);
@@ -4627,6 +4749,10 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
vmcs12->guest_ia32_efer = vcpu->arch.efer;
+
+ vmcs_read_cet_state(&vmx->vcpu, &vmcs12->guest_s_cet,
+ &vmcs12->guest_ssp,
+ &vmcs12->guest_ssp_tbl);
}
/*
@@ -4752,14 +4878,26 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
vmcs_write64(GUEST_BNDCFGS, 0);
+ /*
+ * Load CET state from host state if VM_EXIT_LOAD_CET_STATE is set.
+ * otherwise CET state should be retained across VM-exit, i.e.,
+ * guest values should be propagated from vmcs12 to vmcs01.
+ */
+ if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_CET_STATE)
+ vmcs_write_cet_state(vcpu, vmcs12->host_s_cet, vmcs12->host_ssp,
+ vmcs12->host_ssp_tbl);
+ else
+ vmcs_write_cet_state(vcpu, vmcs12->guest_s_cet, vmcs12->guest_ssp,
+ vmcs12->guest_ssp_tbl);
+
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
vcpu->arch.pat = vmcs12->host_ia32_pat;
}
if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)))
- WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
- vmcs12->host_ia32_perf_global_ctrl));
+ WARN_ON_ONCE(__kvm_emulate_msr_write(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+ vmcs12->host_ia32_perf_global_ctrl));
/* Set L1 segment info according to Intel SDM
27.5.2 Loading Host Segment and Descriptor-Table Registers */
@@ -4937,7 +5075,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
goto vmabort;
}
- if (kvm_set_msr_with_filter(vcpu, h.index, h.value)) {
+ if (kvm_emulate_msr_write(vcpu, h.index, h.value)) {
pr_debug_ratelimited(
"%s WRMSR failed (%u, 0x%x, 0x%llx)\n",
__func__, j, h.index, h.value);
@@ -6216,19 +6354,26 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12,
union vmx_exit_reason exit_reason)
{
- u32 msr_index = kvm_rcx_read(vcpu);
+ u32 msr_index;
gpa_t bitmap;
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
return true;
+ if (exit_reason.basic == EXIT_REASON_MSR_READ_IMM ||
+ exit_reason.basic == EXIT_REASON_MSR_WRITE_IMM)
+ msr_index = vmx_get_exit_qual(vcpu);
+ else
+ msr_index = kvm_rcx_read(vcpu);
+
/*
* The MSR_BITMAP page is divided into four 1024-byte bitmaps,
* for the four combinations of read/write and low/high MSR numbers.
* First we need to figure out which of the four to use:
*/
bitmap = vmcs12->msr_bitmap;
- if (exit_reason.basic == EXIT_REASON_MSR_WRITE)
+ if (exit_reason.basic == EXIT_REASON_MSR_WRITE ||
+ exit_reason.basic == EXIT_REASON_MSR_WRITE_IMM)
bitmap += 2048;
if (msr_index >= 0xc0000000) {
msr_index -= 0xc0000000;
@@ -6527,6 +6672,8 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
case EXIT_REASON_MSR_READ:
case EXIT_REASON_MSR_WRITE:
+ case EXIT_REASON_MSR_READ_IMM:
+ case EXIT_REASON_MSR_WRITE_IMM:
return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
case EXIT_REASON_INVALID_STATE:
return true;
@@ -6561,14 +6708,17 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
case EXIT_REASON_XSETBV:
return true;
- case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
+ case EXIT_REASON_XSAVES:
+ case EXIT_REASON_XRSTORS:
/*
- * This should never happen, since it is not possible to
- * set XSS to a non-zero value---neither in L1 nor in L2.
- * If if it were, XSS would have to be checked against
- * the XSS exit bitmap in vmcs12.
+ * Always forward XSAVES/XRSTORS to L1 as KVM doesn't utilize
+ * XSS-bitmap, and always loads vmcs02 with vmcs12's XSS-bitmap
+ * verbatim, i.e. any exit is due to L1's bitmap. WARN if
+ * XSAVES isn't enabled, as the CPU is supposed to inject #UD
+ * in that case, before consulting the XSS-bitmap.
*/
- return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_XSAVES);
+ WARN_ON_ONCE(!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_XSAVES));
+ return true;
case EXIT_REASON_UMWAIT:
case EXIT_REASON_TPAUSE:
return nested_cpu_has2(vmcs12,
@@ -7029,13 +7179,17 @@ static void nested_vmx_setup_exit_ctls(struct vmcs_config *vmcs_conf,
VM_EXIT_HOST_ADDR_SPACE_SIZE |
#endif
VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
- VM_EXIT_CLEAR_BNDCFGS;
+ VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_CET_STATE;
msrs->exit_ctls_high |=
VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT |
VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
+ if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
+ !kvm_cpu_cap_has(X86_FEATURE_IBT))
+ msrs->exit_ctls_high &= ~VM_EXIT_LOAD_CET_STATE;
+
/* We support free control of debug control saving. */
msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
}
@@ -7051,11 +7205,16 @@ static void nested_vmx_setup_entry_ctls(struct vmcs_config *vmcs_conf,
#ifdef CONFIG_X86_64
VM_ENTRY_IA32E_MODE |
#endif
- VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS;
+ VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS |
+ VM_ENTRY_LOAD_CET_STATE;
msrs->entry_ctls_high |=
(VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER |
VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL);
+ if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
+ !kvm_cpu_cap_has(X86_FEATURE_IBT))
+ msrs->entry_ctls_high &= ~VM_ENTRY_LOAD_CET_STATE;
+
/* We support free control of debug control loading. */
msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
}
@@ -7205,6 +7364,8 @@ static void nested_vmx_setup_basic(struct nested_vmx_msrs *msrs)
msrs->basic |= VMX_BASIC_TRUE_CTLS;
if (cpu_has_vmx_basic_inout())
msrs->basic |= VMX_BASIC_INOUT;
+ if (cpu_has_vmx_basic_no_hw_errcode_cc())
+ msrs->basic |= VMX_BASIC_NO_HW_ERROR_CODE_CC;
}
static void nested_vmx_setup_cr_fixed(struct nested_vmx_msrs *msrs)