summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/svm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/svm')
-rw-r--r--arch/x86/kvm/svm/avic.c18
-rw-r--r--arch/x86/kvm/svm/nested.c91
-rw-r--r--arch/x86/kvm/svm/svm.c106
-rw-r--r--arch/x86/kvm/svm/svm.h24
-rw-r--r--arch/x86/kvm/svm/svm_onhyperv.c41
-rw-r--r--arch/x86/kvm/svm/svm_onhyperv.h130
6 files changed, 336 insertions, 74 deletions
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 5e7e920113f3..1d01da64c333 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -27,10 +27,6 @@
#include "irq.h"
#include "svm.h"
-/* enable / disable AVIC */
-bool avic;
-module_param(avic, bool, S_IRUGO);
-
#define SVM_AVIC_DOORBELL 0xc001011b
#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
@@ -124,7 +120,7 @@ void avic_vm_destroy(struct kvm *kvm)
unsigned long flags;
struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
- if (!avic)
+ if (!enable_apicv)
return;
if (kvm_svm->avic_logical_id_table_page)
@@ -147,7 +143,7 @@ int avic_vm_init(struct kvm *kvm)
struct page *l_page;
u32 vm_id;
- if (!avic)
+ if (!enable_apicv)
return 0;
/* Allocating physical APIC ID table (4KB) */
@@ -240,7 +236,7 @@ static int avic_update_access_page(struct kvm *kvm, bool activate)
* APICv mode change, which update APIC_ACCESS_PAGE_PRIVATE_MEMSLOT
* memory region. So, we need to ensure that kvm->mm == current->mm.
*/
- if ((kvm->arch.apic_access_page_done == activate) ||
+ if ((kvm->arch.apic_access_memslot_enabled == activate) ||
(kvm->mm != current->mm))
goto out;
@@ -253,7 +249,7 @@ static int avic_update_access_page(struct kvm *kvm, bool activate)
goto out;
}
- kvm->arch.apic_access_page_done = activate;
+ kvm->arch.apic_access_memslot_enabled = activate;
out:
mutex_unlock(&kvm->slots_lock);
return r;
@@ -569,7 +565,7 @@ int avic_init_vcpu(struct vcpu_svm *svm)
int ret;
struct kvm_vcpu *vcpu = &svm->vcpu;
- if (!avic || !irqchip_in_kernel(vcpu->kvm))
+ if (!enable_apicv || !irqchip_in_kernel(vcpu->kvm))
return 0;
ret = avic_init_backing_page(vcpu);
@@ -593,7 +589,7 @@ void avic_post_state_restore(struct kvm_vcpu *vcpu)
void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate)
{
- if (!avic || !lapic_in_kernel(vcpu))
+ if (!enable_apicv || !lapic_in_kernel(vcpu))
return;
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
@@ -653,7 +649,7 @@ void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
struct vmcb *vmcb = svm->vmcb;
bool activated = kvm_vcpu_apicv_active(vcpu);
- if (!avic)
+ if (!enable_apicv)
return;
if (activated) {
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 5e8d8443154e..21d03e3a5dfd 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -98,13 +98,18 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
WARN_ON(mmu_is_nested(vcpu));
vcpu->arch.mmu = &vcpu->arch.guest_mmu;
+
+ /*
+ * The NPT format depends on L1's CR4 and EFER, which is in vmcb01. Note,
+ * when called via KVM_SET_NESTED_STATE, that state may _not_ match current
+ * vCPU state. CR0.WP is explicitly ignored, while CR0.PG is required.
+ */
kvm_init_shadow_npt_mmu(vcpu, X86_CR0_PG, svm->vmcb01.ptr->save.cr4,
svm->vmcb01.ptr->save.efer,
svm->nested.ctl.nested_cr3);
vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3;
vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr;
vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
- reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu);
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
}
@@ -380,33 +385,47 @@ static inline bool nested_npt_enabled(struct vcpu_svm *svm)
return svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE;
}
+static void nested_svm_transition_tlb_flush(struct kvm_vcpu *vcpu)
+{
+ /*
+ * TODO: optimize unconditional TLB flush/MMU sync. A partial list of
+ * things to fix before this can be conditional:
+ *
+ * - Flush TLBs for both L1 and L2 remote TLB flush
+ * - Honor L1's request to flush an ASID on nested VMRUN
+ * - Sync nested NPT MMU on VMRUN that flushes L2's ASID[*]
+ * - Don't crush a pending TLB flush in vmcb02 on nested VMRUN
+ * - Flush L1's ASID on KVM_REQ_TLB_FLUSH_GUEST
+ *
+ * [*] Unlike nested EPT, SVM's ASID management can invalidate nested
+ * NPT guest-physical mappings on VMRUN.
+ */
+ kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+}
+
/*
* Load guest's/host's cr3 on nested vmentry or vmexit. @nested_npt is true
* if we are emulating VM-Entry into a guest with NPT enabled.
*/
static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
- bool nested_npt)
+ bool nested_npt, bool reload_pdptrs)
{
if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3)))
return -EINVAL;
- if (!nested_npt && is_pae_paging(vcpu) &&
- (cr3 != kvm_read_cr3(vcpu) || pdptrs_changed(vcpu))) {
- if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)))
- return -EINVAL;
- }
+ if (reload_pdptrs && !nested_npt && is_pae_paging(vcpu) &&
+ CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)))
+ return -EINVAL;
- /*
- * TODO: optimize unconditional TLB flush/MMU sync here and in
- * kvm_init_shadow_npt_mmu().
- */
if (!nested_npt)
- kvm_mmu_new_pgd(vcpu, cr3, false, false);
+ kvm_mmu_new_pgd(vcpu, cr3);
vcpu->arch.cr3 = cr3;
kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
- kvm_init_mmu(vcpu, false);
+ /* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */
+ kvm_init_mmu(vcpu);
return 0;
}
@@ -481,6 +500,7 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
{
const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
/*
* Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2,
@@ -505,10 +525,10 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
/* nested_cr3. */
if (nested_npt_enabled(svm))
- nested_svm_init_mmu_context(&svm->vcpu);
+ nested_svm_init_mmu_context(vcpu);
- svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset =
- svm->vcpu.arch.l1_tsc_offset + svm->nested.ctl.tsc_offset;
+ svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset =
+ vcpu->arch.l1_tsc_offset + svm->nested.ctl.tsc_offset;
svm->vmcb->control.int_ctl =
(svm->nested.ctl.int_ctl & ~mask) |
@@ -523,8 +543,10 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
svm->vmcb->control.pause_filter_count = svm->nested.ctl.pause_filter_count;
svm->vmcb->control.pause_filter_thresh = svm->nested.ctl.pause_filter_thresh;
+ nested_svm_transition_tlb_flush(vcpu);
+
/* Enter Guest-Mode */
- enter_guest_mode(&svm->vcpu);
+ enter_guest_mode(vcpu);
/*
* Merge guest and host intercepts - must be called with vcpu in
@@ -576,7 +598,7 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
nested_vmcb02_prepare_save(svm, vmcb12);
ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3,
- nested_npt_enabled(svm));
+ nested_npt_enabled(svm), true);
if (ret)
return ret;
@@ -596,8 +618,6 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
struct kvm_host_map map;
u64 vmcb12_gpa;
- ++vcpu->stat.nested_run;
-
if (is_smm(vcpu)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
@@ -803,9 +823,11 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
kvm_vcpu_unmap(vcpu, &map, true);
+ nested_svm_transition_tlb_flush(vcpu);
+
nested_svm_uninit_mmu_context(vcpu);
- rc = nested_svm_load_cr3(vcpu, svm->vmcb->save.cr3, false);
+ rc = nested_svm_load_cr3(vcpu, svm->vmcb->save.cr3, false, true);
if (rc)
return 1;
@@ -1228,8 +1250,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
&user_kvm_nested_state->data.svm[0];
struct vmcb_control_area *ctl;
struct vmcb_save_area *save;
+ unsigned long cr0;
int ret;
- u32 cr0;
BUILD_BUG_ON(sizeof(struct vmcb_control_area) + sizeof(struct vmcb_save_area) >
KVM_STATE_NESTED_SVM_VMCB_SIZE);
@@ -1302,6 +1324,19 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
goto out_free;
/*
+ * While the nested guest CR3 is already checked and set by
+ * KVM_SET_SREGS, it was set when nested state was yet loaded,
+ * thus MMU might not be initialized correctly.
+ * Set it again to fix this.
+ */
+
+ ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
+ nested_npt_enabled(svm), false);
+ if (WARN_ON_ONCE(ret))
+ goto out_free;
+
+
+ /*
* All checks done, we can enter guest mode. Userspace provides
* vmcb12.control, which will be combined with L1 and stored into
* vmcb02, and the L1 save state which we store in vmcb01.
@@ -1358,9 +1393,15 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
if (WARN_ON(!is_guest_mode(vcpu)))
return true;
- if (nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
- nested_npt_enabled(svm)))
- return false;
+ if (!vcpu->arch.pdptrs_from_userspace &&
+ !nested_npt_enabled(svm) && is_pae_paging(vcpu))
+ /*
+ * Reload the guest's PDPTRs since after a migration
+ * the guest CR3 might be restored prior to setting the nested
+ * state which can lead to a load of wrong PDPTRs.
+ */
+ if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3)))
+ return false;
if (!nested_svm_vmrun_msrpm(svm)) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e088086f3de6..8834822c00cd 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -43,6 +43,9 @@
#include "svm.h"
#include "svm_ops.h"
+#include "kvm_onhyperv.h"
+#include "svm_onhyperv.h"
+
#define __ex(x) __kvm_handle_fault_on_reboot(x)
MODULE_AUTHOR("Qumranet");
@@ -185,6 +188,13 @@ module_param(vls, int, 0444);
static int vgif = true;
module_param(vgif, int, 0444);
+/*
+ * enable / disable AVIC. Because the defaults differ for APICv
+ * support between VMX and SVM we cannot use module_param_named.
+ */
+static bool avic;
+module_param(avic, bool, 0444);
+
bool __read_mostly dump_invalid_vmcb;
module_param(dump_invalid_vmcb, bool, 0644);
@@ -673,6 +683,9 @@ static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm,
write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
msrpm[offset] = tmp;
+
+ svm_hv_vmcb_dirty_nested_enlightenments(vcpu);
+
}
void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
@@ -939,6 +952,16 @@ static __init int svm_hardware_setup(void)
int r;
unsigned int order = get_order(IOPM_SIZE);
+ /*
+ * NX is required for shadow paging and for NPT if the NX huge pages
+ * mitigation is enabled.
+ */
+ if (!boot_cpu_has(X86_FEATURE_NX)) {
+ pr_err_ratelimited("NX (Execute Disable) not supported\n");
+ return -EOPNOTSUPP;
+ }
+ kvm_enable_efer_bits(EFER_NX);
+
iopm_pages = alloc_pages(GFP_KERNEL, order);
if (!iopm_pages)
@@ -952,9 +975,6 @@ static __init int svm_hardware_setup(void)
supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
- if (boot_cpu_has(X86_FEATURE_NX))
- kvm_enable_efer_bits(EFER_NX);
-
if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
kvm_enable_efer_bits(EFER_FFXSR);
@@ -996,6 +1016,8 @@ static __init int svm_hardware_setup(void)
/* Note, SEV setup consumes npt_enabled. */
sev_hardware_setup();
+ svm_hv_hardware_setup();
+
svm_adjust_mmio_mask();
for_each_possible_cpu(cpu) {
@@ -1009,14 +1031,12 @@ static __init int svm_hardware_setup(void)
nrips = false;
}
- if (avic) {
- if (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC)) {
- avic = false;
- } else {
- pr_info("AVIC enabled\n");
+ enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC);
- amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
- }
+ if (enable_apicv) {
+ pr_info("AVIC enabled\n");
+
+ amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
}
if (vls) {
@@ -1080,26 +1100,30 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
seg->base = 0;
}
-static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+static u64 svm_get_l2_tsc_offset(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- u64 g_tsc_offset = 0;
- if (is_guest_mode(vcpu)) {
- /* Write L1's TSC offset. */
- g_tsc_offset = svm->vmcb->control.tsc_offset -
- svm->vmcb01.ptr->control.tsc_offset;
- svm->vmcb01.ptr->control.tsc_offset = offset;
- }
+ return svm->nested.ctl.tsc_offset;
+}
- trace_kvm_write_tsc_offset(vcpu->vcpu_id,
- svm->vmcb->control.tsc_offset - g_tsc_offset,
- offset);
+static u64 svm_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
+{
+ return kvm_default_tsc_scaling_ratio;
+}
- svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
+static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ svm->vmcb01.ptr->control.tsc_offset = vcpu->arch.l1_tsc_offset;
+ svm->vmcb->control.tsc_offset = offset;
vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
- return svm->vmcb->control.tsc_offset;
+}
+
+static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
+{
+ wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
}
/* Evaluate instruction intercepts that depend on guest CPUID features. */
@@ -1287,6 +1311,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
}
}
+ svm_hv_init_vmcb(svm->vmcb);
+
vmcb_mark_all_dirty(svm->vmcb);
enable_gif(svm);
@@ -3106,6 +3132,8 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
return;
}
+ pr_err("VMCB %p, last attempted VMRUN on CPU %d\n",
+ svm->current_vmcb->ptr, vcpu->arch.last_vmentry_cpu);
pr_err("VMCB Control Area:\n");
pr_err("%-20s%04x\n", "cr_read:", control->intercepts[INTERCEPT_CR] & 0xffff);
pr_err("%-20s%04x\n", "cr_write:", control->intercepts[INTERCEPT_CR] >> 16);
@@ -3762,6 +3790,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
}
svm->vmcb->save.cr2 = vcpu->arch.cr2;
+ svm_hv_update_vp_id(svm->vmcb, vcpu);
+
/*
* Run with all-zero DR6 unless needed, so that we can get the exact cause
* of a #DB.
@@ -3835,6 +3865,12 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
svm->next_rip = 0;
if (is_guest_mode(vcpu)) {
nested_sync_control_from_vmcb02(svm);
+
+ /* Track VMRUNs that have made past consistency checking */
+ if (svm->nested.nested_run_pending &&
+ svm->vmcb->control.exit_code != SVM_EXIT_ERR)
+ ++vcpu->stat.nested_run;
+
svm->nested.nested_run_pending = 0;
}
@@ -3846,10 +3882,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.apf.host_apf_flags =
kvm_read_and_reset_apf_flags();
- if (npt_enabled) {
- vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
- vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
- }
+ if (npt_enabled)
+ kvm_register_clear_available(vcpu, VCPU_EXREG_PDPTR);
/*
* We need to handle MC intercepts here before the vcpu has a chance to
@@ -3877,6 +3911,8 @@ static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
svm->vmcb->control.nested_cr3 = __sme_set(root_hpa);
vmcb_mark_dirty(svm->vmcb, VMCB_NPT);
+ hv_track_root_tdp(vcpu, root_hpa);
+
/* Loading L2's CR3 is handled by enter_svm_guest_mode. */
if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
return;
@@ -4249,7 +4285,7 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
return !svm_smi_blocked(vcpu);
}
-static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
+static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
{
struct vcpu_svm *svm = to_svm(vcpu);
int ret;
@@ -4271,7 +4307,7 @@ static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
return 0;
}
-static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
+static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_host_map map;
@@ -4427,13 +4463,12 @@ static int svm_vm_init(struct kvm *kvm)
if (!pause_filter_count || !pause_filter_thresh)
kvm->arch.pause_in_guest = true;
- if (avic) {
+ if (enable_apicv) {
int ret = avic_vm_init(kvm);
if (ret)
return ret;
}
- kvm_apicv_init(kvm, avic);
return 0;
}
@@ -4524,7 +4559,10 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.has_wbinvd_exit = svm_has_wbinvd_exit,
- .write_l1_tsc_offset = svm_write_l1_tsc_offset,
+ .get_l2_tsc_offset = svm_get_l2_tsc_offset,
+ .get_l2_tsc_multiplier = svm_get_l2_tsc_multiplier,
+ .write_tsc_offset = svm_write_tsc_offset,
+ .write_tsc_multiplier = svm_write_tsc_multiplier,
.load_mmu_pgd = svm_load_mmu_pgd,
@@ -4544,8 +4582,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.setup_mce = svm_setup_mce,
.smi_allowed = svm_smi_allowed,
- .pre_enter_smm = svm_pre_enter_smm,
- .pre_leave_smm = svm_pre_leave_smm,
+ .enter_smm = svm_enter_smm,
+ .leave_smm = svm_leave_smm,
.enable_smi_window = svm_enable_smi_window,
.mem_enc_op = svm_mem_enc_op,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 2908c6ab5bb4..f89b623bb591 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -32,6 +32,11 @@
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
+/*
+ * Clean bits in VMCB.
+ * VMCB_ALL_CLEAN_MASK might also need to
+ * be updated if this enum is modified.
+ */
enum {
VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
pause filter count */
@@ -49,9 +54,17 @@ enum {
* AVIC PHYSICAL_TABLE pointer,
* AVIC LOGICAL_TABLE pointer
*/
- VMCB_DIRTY_MAX,
+ VMCB_SW = 31, /* Reserved for hypervisor/software use */
};
+#define VMCB_ALL_CLEAN_MASK ( \
+ (1U << VMCB_INTERCEPTS) | (1U << VMCB_PERM_MAP) | \
+ (1U << VMCB_ASID) | (1U << VMCB_INTR) | \
+ (1U << VMCB_NPT) | (1U << VMCB_CR) | (1U << VMCB_DR) | \
+ (1U << VMCB_DT) | (1U << VMCB_SEG) | (1U << VMCB_CR2) | \
+ (1U << VMCB_LBR) | (1U << VMCB_AVIC) | \
+ (1U << VMCB_SW))
+
/* TPR and CR2 are always written before VMRUN */
#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2))
@@ -238,10 +251,15 @@ static inline void vmcb_mark_all_dirty(struct vmcb *vmcb)
static inline void vmcb_mark_all_clean(struct vmcb *vmcb)
{
- vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
+ vmcb->control.clean = VMCB_ALL_CLEAN_MASK
& ~VMCB_ALWAYS_DIRTY_MASK;
}
+static inline bool vmcb_is_clean(struct vmcb *vmcb, int bit)
+{
+ return (vmcb->control.clean & (1 << bit));
+}
+
static inline void vmcb_mark_dirty(struct vmcb *vmcb, int bit)
{
vmcb->control.clean &= ~(1 << bit);
@@ -480,8 +498,6 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
-extern bool avic;
-
static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
{
svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
diff --git a/arch/x86/kvm/svm/svm_onhyperv.c b/arch/x86/kvm/svm/svm_onhyperv.c
new file mode 100644
index 000000000000..98aa981c04ec
--- /dev/null
+++ b/arch/x86/kvm/svm/svm_onhyperv.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM L1 hypervisor optimizations on Hyper-V for SVM.
+ */
+
+#include <linux/kvm_host.h>
+#include "kvm_cache_regs.h"
+
+#include <asm/mshyperv.h>
+
+#include "svm.h"
+#include "svm_ops.h"
+
+#include "hyperv.h"
+#include "kvm_onhyperv.h"
+#include "svm_onhyperv.h"
+
+int svm_hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
+{
+ struct hv_enlightenments *hve;
+ struct hv_partition_assist_pg **p_hv_pa_pg =
+ &to_kvm_hv(vcpu->kvm)->hv_pa_pg;
+
+ if (!*p_hv_pa_pg)
+ *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL);
+
+ if (!*p_hv_pa_pg)
+ return -ENOMEM;
+
+ hve = (struct hv_enlightenments *)to_svm(vcpu)->vmcb->control.reserved_sw;
+
+ hve->partition_assist_page = __pa(*p_hv_pa_pg);
+ hve->hv_vm_id = (unsigned long)vcpu->kvm;
+ if (!hve->hv_enlightenments_control.nested_flush_hypercall) {
+ hve->hv_enlightenments_control.nested_flush_hypercall = 1;
+ vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
+ }
+
+ return 0;
+}
+
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
new file mode 100644
index 000000000000..9b9a55abc29f
--- /dev/null
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * KVM L1 hypervisor optimizations on Hyper-V for SVM.
+ */
+
+#ifndef __ARCH_X86_KVM_SVM_ONHYPERV_H__
+#define __ARCH_X86_KVM_SVM_ONHYPERV_H__
+
+#if IS_ENABLED(CONFIG_HYPERV)
+#include <asm/mshyperv.h>
+
+#include "hyperv.h"
+#include "kvm_onhyperv.h"
+
+static struct kvm_x86_ops svm_x86_ops;
+
+/*
+ * Hyper-V uses the software reserved 32 bytes in VMCB
+ * control area to expose SVM enlightenments to guests.
+ */
+struct hv_enlightenments {
+ struct __packed hv_enlightenments_control {
+ u32 nested_flush_hypercall:1;
+ u32 msr_bitmap:1;
+ u32 enlightened_npt_tlb: 1;
+ u32 reserved:29;
+ } __packed hv_enlightenments_control;
+ u32 hv_vp_id;
+ u64 hv_vm_id;
+ u64 partition_assist_page;
+ u64 reserved;
+} __packed;
+
+/*
+ * Hyper-V uses the software reserved clean bit in VMCB
+ */
+#define VMCB_HV_NESTED_ENLIGHTENMENTS VMCB_SW
+
+int svm_hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu);
+
+static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
+{
+ struct hv_enlightenments *hve =
+ (struct hv_enlightenments *)vmcb->control.reserved_sw;
+
+ if (npt_enabled &&
+ ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB)
+ hve->hv_enlightenments_control.enlightened_npt_tlb = 1;
+}
+
+static inline void svm_hv_hardware_setup(void)
+{
+ if (npt_enabled &&
+ ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB) {
+ pr_info("kvm: Hyper-V enlightened NPT TLB flush enabled\n");
+ svm_x86_ops.tlb_remote_flush = hv_remote_flush_tlb;
+ svm_x86_ops.tlb_remote_flush_with_range =
+ hv_remote_flush_tlb_with_range;
+ }
+
+ if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) {
+ int cpu;
+
+ pr_info("kvm: Hyper-V Direct TLB Flush enabled\n");
+ for_each_online_cpu(cpu) {
+ struct hv_vp_assist_page *vp_ap =
+ hv_get_vp_assist_page(cpu);
+
+ if (!vp_ap)
+ continue;
+
+ vp_ap->nested_control.features.directhypercall = 1;
+ }
+ svm_x86_ops.enable_direct_tlbflush =
+ svm_hv_enable_direct_tlbflush;
+ }
+}
+
+static inline void svm_hv_vmcb_dirty_nested_enlightenments(
+ struct kvm_vcpu *vcpu)
+{
+ struct vmcb *vmcb = to_svm(vcpu)->vmcb;
+ struct hv_enlightenments *hve =
+ (struct hv_enlightenments *)vmcb->control.reserved_sw;
+
+ /*
+ * vmcb can be NULL if called during early vcpu init.
+ * And its okay not to mark vmcb dirty during vcpu init
+ * as we mark it dirty unconditionally towards end of vcpu
+ * init phase.
+ */
+ if (vmcb && vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) &&
+ hve->hv_enlightenments_control.msr_bitmap)
+ vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
+}
+
+static inline void svm_hv_update_vp_id(struct vmcb *vmcb,
+ struct kvm_vcpu *vcpu)
+{
+ struct hv_enlightenments *hve =
+ (struct hv_enlightenments *)vmcb->control.reserved_sw;
+ u32 vp_index = kvm_hv_get_vpindex(vcpu);
+
+ if (hve->hv_vp_id != vp_index) {
+ hve->hv_vp_id = vp_index;
+ vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
+ }
+}
+#else
+
+static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
+{
+}
+
+static inline void svm_hv_hardware_setup(void)
+{
+}
+
+static inline void svm_hv_vmcb_dirty_nested_enlightenments(
+ struct kvm_vcpu *vcpu)
+{
+}
+
+static inline void svm_hv_update_vp_id(struct vmcb *vmcb,
+ struct kvm_vcpu *vcpu)
+{
+}
+#endif /* CONFIG_HYPERV */
+
+#endif /* __ARCH_X86_KVM_SVM_ONHYPERV_H__ */