summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kvm/lapic.c5
-rw-r--r--arch/x86/kvm/svm.c26
-rw-r--r--arch/x86/kvm/vmx.c147
-rw-r--r--arch/x86/kvm/x86.c7
-rw-r--r--arch/x86/platform/efi/efi.c6
-rw-r--r--arch/x86/platform/efi/efi_64.c79
-rw-r--r--arch/x86/platform/efi/quirks.c3
7 files changed, 161 insertions, 112 deletions
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c329d2894905..d24c8742d9b0 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1495,8 +1495,10 @@ EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
static void cancel_hv_timer(struct kvm_lapic *apic)
{
+ preempt_disable();
kvm_x86_ops->cancel_hv_timer(apic->vcpu);
apic->lapic_timer.hv_timer_in_use = false;
+ preempt_enable();
}
static bool start_hv_timer(struct kvm_lapic *apic)
@@ -1934,7 +1936,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
for (i = 0; i < KVM_APIC_LVT_NUM; i++)
kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
apic_update_lvtt(apic);
- if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
+ if (kvm_vcpu_is_reset_bsp(vcpu) &&
+ kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
kvm_lapic_set_reg(apic, APIC_LVT0,
SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 183ddb235fb4..ba9891ac5c56 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1807,7 +1807,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
* AMD's VMCB does not have an explicit unusable field, so emulate it
* for cross vendor migration purposes by "not present"
*/
- var->unusable = !var->present || (var->type == 0);
+ var->unusable = !var->present;
switch (seg) {
case VCPU_SREG_TR:
@@ -1840,6 +1840,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
*/
if (var->unusable)
var->db = 0;
+ /* This is symmetric with svm_set_segment() */
var->dpl = to_svm(vcpu)->vmcb->save.cpl;
break;
}
@@ -1980,18 +1981,14 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
s->base = var->base;
s->limit = var->limit;
s->selector = var->selector;
- if (var->unusable)
- s->attrib = 0;
- else {
- s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
- s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
- s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
- s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT;
- s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
- s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
- s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
- s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
- }
+ s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
+ s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
+ s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
+ s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT;
+ s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
+ s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
+ s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
+ s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
/*
* This is always accurate, except if SYSRET returned to a segment
@@ -2000,7 +1997,8 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
* would entail passing the CPL to userspace and back.
*/
if (seg == VCPU_SREG_SS)
- svm->vmcb->save.cpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
+ /* This is symmetric with svm_get_segment() */
+ svm->vmcb->save.cpl = (var->dpl & 3);
mark_dirty(svm->vmcb, VMCB_SEG);
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 72f78396bc09..9b4b5d6dcd34 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6914,97 +6914,21 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
return 0;
}
-/*
- * This function performs the various checks including
- * - if it's 4KB aligned
- * - No bits beyond the physical address width are set
- * - Returns 0 on success or else 1
- * (Intel SDM Section 30.3)
- */
-static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
- gpa_t *vmpointer)
+static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
{
gva_t gva;
- gpa_t vmptr;
struct x86_exception e;
- struct page *page;
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- int maxphyaddr = cpuid_maxphyaddr(vcpu);
if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
return 1;
- if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
- sizeof(vmptr), &e)) {
+ if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, vmpointer,
+ sizeof(*vmpointer), &e)) {
kvm_inject_page_fault(vcpu, &e);
return 1;
}
- switch (exit_reason) {
- case EXIT_REASON_VMON:
- /*
- * SDM 3: 24.11.5
- * The first 4 bytes of VMXON region contain the supported
- * VMCS revision identifier
- *
- * Note - IA32_VMX_BASIC[48] will never be 1
- * for the nested case;
- * which replaces physical address width with 32
- *
- */
- if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
- nested_vmx_failInvalid(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
- }
-
- page = nested_get_page(vcpu, vmptr);
- if (page == NULL) {
- nested_vmx_failInvalid(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
- }
- if (*(u32 *)kmap(page) != VMCS12_REVISION) {
- kunmap(page);
- nested_release_page_clean(page);
- nested_vmx_failInvalid(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
- }
- kunmap(page);
- nested_release_page_clean(page);
- vmx->nested.vmxon_ptr = vmptr;
- break;
- case EXIT_REASON_VMCLEAR:
- if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
- nested_vmx_failValid(vcpu,
- VMXERR_VMCLEAR_INVALID_ADDRESS);
- return kvm_skip_emulated_instruction(vcpu);
- }
-
- if (vmptr == vmx->nested.vmxon_ptr) {
- nested_vmx_failValid(vcpu,
- VMXERR_VMCLEAR_VMXON_POINTER);
- return kvm_skip_emulated_instruction(vcpu);
- }
- break;
- case EXIT_REASON_VMPTRLD:
- if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
- nested_vmx_failValid(vcpu,
- VMXERR_VMPTRLD_INVALID_ADDRESS);
- return kvm_skip_emulated_instruction(vcpu);
- }
-
- if (vmptr == vmx->nested.vmxon_ptr) {
- nested_vmx_failValid(vcpu,
- VMXERR_VMPTRLD_VMXON_POINTER);
- return kvm_skip_emulated_instruction(vcpu);
- }
- break;
- default:
- return 1; /* shouldn't happen */
- }
-
- if (vmpointer)
- *vmpointer = vmptr;
return 0;
}
@@ -7066,6 +6990,8 @@ out_msr_bitmap:
static int handle_vmon(struct kvm_vcpu *vcpu)
{
int ret;
+ gpa_t vmptr;
+ struct page *page;
struct vcpu_vmx *vmx = to_vmx(vcpu);
const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
| FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
@@ -7095,9 +7021,37 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
return 1;
}
- if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL))
+ if (nested_vmx_get_vmptr(vcpu, &vmptr))
return 1;
-
+
+ /*
+ * SDM 3: 24.11.5
+ * The first 4 bytes of VMXON region contain the supported
+ * VMCS revision identifier
+ *
+ * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
+ * which replaces physical address width with 32
+ */
+ if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
+ nested_vmx_failInvalid(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+
+ page = nested_get_page(vcpu, vmptr);
+ if (page == NULL) {
+ nested_vmx_failInvalid(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+ if (*(u32 *)kmap(page) != VMCS12_REVISION) {
+ kunmap(page);
+ nested_release_page_clean(page);
+ nested_vmx_failInvalid(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+ kunmap(page);
+ nested_release_page_clean(page);
+
+ vmx->nested.vmxon_ptr = vmptr;
ret = enter_vmx_operation(vcpu);
if (ret)
return ret;
@@ -7213,9 +7167,19 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
if (!nested_vmx_check_permission(vcpu))
return 1;
- if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMCLEAR, &vmptr))
+ if (nested_vmx_get_vmptr(vcpu, &vmptr))
return 1;
+ if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
+ nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+
+ if (vmptr == vmx->nested.vmxon_ptr) {
+ nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_VMXON_POINTER);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+
if (vmptr == vmx->nested.current_vmptr)
nested_release_vmcs12(vmx);
@@ -7545,9 +7509,19 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
if (!nested_vmx_check_permission(vcpu))
return 1;
- if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMPTRLD, &vmptr))
+ if (nested_vmx_get_vmptr(vcpu, &vmptr))
return 1;
+ if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
+ nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+
+ if (vmptr == vmx->nested.vmxon_ptr) {
+ nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+
if (vmx->nested.current_vmptr != vmptr) {
struct vmcs12 *new_vmcs12;
struct page *page;
@@ -7913,11 +7887,13 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
{
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
int cr = exit_qualification & 15;
- int reg = (exit_qualification >> 8) & 15;
- unsigned long val = kvm_register_readl(vcpu, reg);
+ int reg;
+ unsigned long val;
switch ((exit_qualification >> 4) & 3) {
case 0: /* mov to cr */
+ reg = (exit_qualification >> 8) & 15;
+ val = kvm_register_readl(vcpu, reg);
switch (cr) {
case 0:
if (vmcs12->cr0_guest_host_mask &
@@ -7972,6 +7948,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
* lmsw can change bits 1..3 of cr0, and only set bit 0 of
* cr0. Other attempted changes are ignored, with no exit.
*/
+ val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
if (vmcs12->cr0_guest_host_mask & 0xe &
(val ^ vmcs12->cr0_read_shadow))
return true;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 02363e37d4a6..a2cd0997343c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8394,10 +8394,13 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
if (vcpu->arch.pv.pv_unhalted)
return true;
- if (atomic_read(&vcpu->arch.nmi_queued))
+ if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
+ (vcpu->arch.nmi_pending &&
+ kvm_x86_ops->nmi_allowed(vcpu)))
return true;
- if (kvm_test_request(KVM_REQ_SMI, vcpu))
+ if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
+ (vcpu->arch.smi_pending && !is_smm(vcpu)))
return true;
if (kvm_arch_interrupt_allowed(vcpu) &&
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 7e76a4d8304b..43b96f5f78ba 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -828,9 +828,11 @@ static void __init kexec_enter_virtual_mode(void)
/*
* We don't do virtual mode, since we don't do runtime services, on
- * non-native EFI
+ * non-native EFI. With efi=old_map, we don't do runtime services in
+ * kexec kernel because in the initial boot something else might
+ * have been mapped at these virtual addresses.
*/
- if (!efi_is_native()) {
+ if (!efi_is_native() || efi_enabled(EFI_OLD_MEMMAP)) {
efi_memmap_unmap();
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
return;
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index c488625c9712..eb8dff15a7f6 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -71,11 +71,13 @@ static void __init early_code_mapping_set_exec(int executable)
pgd_t * __init efi_call_phys_prolog(void)
{
- unsigned long vaddress;
- pgd_t *save_pgd;
+ unsigned long vaddr, addr_pgd, addr_p4d, addr_pud;
+ pgd_t *save_pgd, *pgd_k, *pgd_efi;
+ p4d_t *p4d, *p4d_k, *p4d_efi;
+ pud_t *pud;
int pgd;
- int n_pgds;
+ int n_pgds, i, j;
if (!efi_enabled(EFI_OLD_MEMMAP)) {
save_pgd = (pgd_t *)read_cr3();
@@ -88,10 +90,49 @@ pgd_t * __init efi_call_phys_prolog(void)
n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL);
+ /*
+ * Build 1:1 identity mapping for efi=old_map usage. Note that
+ * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while
+ * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical
+ * address X, the pud_index(X) != pud_index(__va(X)), we can only copy
+ * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping.
+ * This means here we can only reuse the PMD tables of the direct mapping.
+ */
for (pgd = 0; pgd < n_pgds; pgd++) {
- save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE);
- vaddress = (unsigned long)__va(pgd * PGDIR_SIZE);
- set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
+ addr_pgd = (unsigned long)(pgd * PGDIR_SIZE);
+ vaddr = (unsigned long)__va(pgd * PGDIR_SIZE);
+ pgd_efi = pgd_offset_k(addr_pgd);
+ save_pgd[pgd] = *pgd_efi;
+
+ p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd);
+ if (!p4d) {
+ pr_err("Failed to allocate p4d table!\n");
+ goto out;
+ }
+
+ for (i = 0; i < PTRS_PER_P4D; i++) {
+ addr_p4d = addr_pgd + i * P4D_SIZE;
+ p4d_efi = p4d + p4d_index(addr_p4d);
+
+ pud = pud_alloc(&init_mm, p4d_efi, addr_p4d);
+ if (!pud) {
+ pr_err("Failed to allocate pud table!\n");
+ goto out;
+ }
+
+ for (j = 0; j < PTRS_PER_PUD; j++) {
+ addr_pud = addr_p4d + j * PUD_SIZE;
+
+ if (addr_pud > (max_pfn << PAGE_SHIFT))
+ break;
+
+ vaddr = (unsigned long)__va(addr_pud);
+
+ pgd_k = pgd_offset_k(vaddr);
+ p4d_k = p4d_offset(pgd_k, vaddr);
+ pud[j] = *pud_offset(p4d_k, vaddr);
+ }
+ }
}
out:
__flush_tlb_all();
@@ -104,8 +145,11 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
/*
* After the lock is released, the original page table is restored.
*/
- int pgd_idx;
+ int pgd_idx, i;
int nr_pgds;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
if (!efi_enabled(EFI_OLD_MEMMAP)) {
write_cr3((unsigned long)save_pgd);
@@ -115,9 +159,28 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
- for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++)
+ for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) {
+ pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
+ if (!(pgd_val(*pgd) & _PAGE_PRESENT))
+ continue;
+
+ for (i = 0; i < PTRS_PER_P4D; i++) {
+ p4d = p4d_offset(pgd,
+ pgd_idx * PGDIR_SIZE + i * P4D_SIZE);
+
+ if (!(p4d_val(*p4d) & _PAGE_PRESENT))
+ continue;
+
+ pud = (pud_t *)p4d_page_vaddr(*p4d);
+ pud_free(&init_mm, pud);
+ }
+
+ p4d = (p4d_t *)pgd_page_vaddr(*pgd);
+ p4d_free(&init_mm, p4d);
+ }
+
kfree(save_pgd);
__flush_tlb_all();
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 26615991d69c..e0cf95a83f3f 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -360,6 +360,9 @@ void __init efi_free_boot_services(void)
free_bootmem_late(start, size);
}
+ if (!num_entries)
+ return;
+
new_size = efi.memmap.desc_size * num_entries;
new_phys = efi_memmap_alloc(num_entries);
if (!new_phys) {