summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/boot/compressed/kaslr.c3
-rw-r--r--arch/x86/boot/compressed/misc.c6
-rw-r--r--arch/x86/boot/compressed/misc.h2
-rw-r--r--arch/x86/events/intel/core.c4
-rw-r--r--arch/x86/events/intel/uncore.c2
-rw-r--r--arch/x86/include/asm/extable.h1
-rw-r--r--arch/x86/include/asm/kvm_emulate.h1
-rw-r--r--arch/x86/include/asm/mshyperv.h3
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/kernel/cpu/cyrix.c1
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c4
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c16
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c3
-rw-r--r--arch/x86/kernel/kvm.c2
-rw-r--r--arch/x86/kernel/process.c11
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/sys_x86_64.c4
-rw-r--r--arch/x86/kernel/tboot.c2
-rw-r--r--arch/x86/kernel/traps.c2
-rw-r--r--arch/x86/kvm/cpuid.c20
-rw-r--r--arch/x86/kvm/emulate.c1
-rw-r--r--arch/x86/kvm/lapic.c5
-rw-r--r--arch/x86/kvm/mmu.c7
-rw-r--r--arch/x86/kvm/mmu.h1
-rw-r--r--arch/x86/kvm/svm.c26
-rw-r--r--arch/x86/kvm/vmx.c149
-rw-r--r--arch/x86/kvm/x86.c72
-rw-r--r--arch/x86/mm/extable.c3
-rw-r--r--arch/x86/mm/hugetlbpage.c2
-rw-r--r--arch/x86/mm/init.c6
-rw-r--r--arch/x86/mm/init_64.c8
-rw-r--r--arch/x86/mm/pat.c9
-rw-r--r--arch/x86/platform/efi/efi.c6
-rw-r--r--arch/x86/platform/efi/efi_64.c79
-rw-r--r--arch/x86/platform/efi/quirks.c3
36 files changed, 261 insertions, 211 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4ccfacc7232a..0efb4c9497bc 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2776,10 +2776,6 @@ config COMPAT_FOR_U64_ALIGNMENT
config SYSVIPC_COMPAT
def_bool y
depends on SYSVIPC
-
-config KEYS_COMPAT
- def_bool y
- depends on KEYS
endif
endmenu
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 54c24f0a43d3..56a7e9201741 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -564,9 +564,6 @@ void choose_random_location(unsigned long input,
{
unsigned long random_addr, min_addr;
- /* By default, keep output position unchanged. */
- *virt_addr = *output;
-
if (cmdline_find_option_bool("nokaslr")) {
warn("KASLR disabled: 'nokaslr' on cmdline.");
return;
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index b3c5a5f030ce..00241c815524 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -338,7 +338,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
unsigned long output_len)
{
const unsigned long kernel_total_size = VO__end - VO__text;
- unsigned long virt_addr = (unsigned long)output;
+ unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
/* Retain x86 boot parameters pointer passed from startup_32/64. */
boot_params = rmode;
@@ -390,6 +390,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
#ifdef CONFIG_X86_64
if (heap > 0x3fffffffffffUL)
error("Destination address too large");
+ if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE)
+ error("Destination virtual address is beyond the kernel mapping area");
#else
if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
error("Destination address too large");
@@ -397,7 +399,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
#ifndef CONFIG_RELOCATABLE
if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
error("Destination address does not match LOAD_PHYSICAL_ADDR");
- if ((unsigned long)output != virt_addr)
+ if (virt_addr != LOAD_PHYSICAL_ADDR)
error("Destination virtual address changed when not relocatable");
#endif
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 1c8355eadbd1..766a5211f827 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -81,8 +81,6 @@ static inline void choose_random_location(unsigned long input,
unsigned long output_size,
unsigned long *virt_addr)
{
- /* No change from existing output location. */
- *virt_addr = *output;
}
#endif
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a6d91d4e37a1..110ce8238466 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -431,11 +431,11 @@ static __initconst const u64 skl_hw_cache_event_ids
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */
- [ C(RESULT_MISS) ] = 0x608, /* DTLB_LOAD_MISSES.WALK_COMPLETED */
+ [ C(RESULT_MISS) ] = 0xe08, /* DTLB_LOAD_MISSES.WALK_COMPLETED */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */
- [ C(RESULT_MISS) ] = 0x649, /* DTLB_STORE_MISSES.WALK_COMPLETED */
+ [ C(RESULT_MISS) ] = 0xe49, /* DTLB_STORE_MISSES.WALK_COMPLETED */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 758c1aa5009d..44ec523287f6 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1170,7 +1170,7 @@ static int uncore_event_cpu_online(unsigned int cpu)
pmu = type->pmus;
for (i = 0; i < type->num_boxes; i++, pmu++) {
box = pmu->boxes[pkg];
- if (!box && atomic_inc_return(&box->refcnt) == 1)
+ if (box && atomic_inc_return(&box->refcnt) == 1)
uncore_box_init(box);
}
}
diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h
index b8ad261d11dc..c66d19e3c23e 100644
--- a/arch/x86/include/asm/extable.h
+++ b/arch/x86/include/asm/extable.h
@@ -29,6 +29,7 @@ struct pt_regs;
} while (0)
extern int fixup_exception(struct pt_regs *regs, int trapnr);
+extern int fixup_bug(struct pt_regs *regs, int trapnr);
extern bool ex_has_fault_handler(unsigned long ip);
extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 055962615779..722d0e568863 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -296,6 +296,7 @@ struct x86_emulate_ctxt {
bool perm_ok; /* do not check permissions if true */
bool ud; /* inject an #UD if host doesn't support insn */
+ bool tf; /* TF value before instruction (after for syscall/sysret) */
bool have_exception;
struct x86_exception exception;
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index fba100713924..d5acc27ed1cc 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -2,8 +2,7 @@
#define _ASM_X86_MSHYPER_H
#include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/clocksource.h>
+#include <linux/atomic.h>
#include <asm/hyperv.h>
/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3cada998a402..a28b671f1549 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -860,8 +860,6 @@ extern unsigned long KSTK_ESP(struct task_struct *task);
#endif /* CONFIG_X86_64 */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
unsigned long new_sp);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index a70fd61095f8..6f077445647a 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -255,6 +255,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
break;
case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */
+ case 11: /* GX1 with inverted Device ID */
#ifdef CONFIG_PCI
{
u32 vendor, device;
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index f5af0cc7eb0d..9257bd9dc664 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -856,11 +856,13 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
dentry = kernfs_mount(fs_type, flags, rdt_root,
RDTGROUP_SUPER_MAGIC, NULL);
if (IS_ERR(dentry))
- goto out_cdp;
+ goto out_destroy;
static_branch_enable(&rdt_enable_key);
goto out;
+out_destroy:
+ kernfs_remove(kn_info);
out_cdp:
cdp_disable();
out:
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 45db4d2ebd01..e9f4d762aa5b 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -320,7 +320,7 @@ void load_ucode_amd_ap(unsigned int cpuid_1_eax)
}
static enum ucode_state
-load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size);
+load_microcode_amd(bool save, u8 family, const u8 *data, size_t size);
int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
{
@@ -338,8 +338,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
if (!desc.mc)
return -EINVAL;
- ret = load_microcode_amd(smp_processor_id(), x86_family(cpuid_1_eax),
- desc.data, desc.size);
+ ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size);
if (ret != UCODE_OK)
return -EINVAL;
@@ -675,7 +674,7 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
}
static enum ucode_state
-load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size)
+load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)
{
enum ucode_state ret;
@@ -689,8 +688,8 @@ load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size)
#ifdef CONFIG_X86_32
/* save BSP's matching patch for early load */
- if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) {
- struct ucode_patch *p = find_patch(cpu);
+ if (save) {
+ struct ucode_patch *p = find_patch(0);
if (p) {
memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data),
@@ -722,11 +721,12 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
{
char fw_name[36] = "amd-ucode/microcode_amd.bin";
struct cpuinfo_x86 *c = &cpu_data(cpu);
+ bool bsp = c->cpu_index == boot_cpu_data.cpu_index;
enum ucode_state ret = UCODE_NFOUND;
const struct firmware *fw;
/* reload ucode container only on the boot cpu */
- if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index)
+ if (!refresh_fw || !bsp)
return UCODE_OK;
if (c->x86 >= 0x15)
@@ -743,7 +743,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
goto fw_release;
}
- ret = load_microcode_amd(cpu, c->x86, fw->data, fw->size);
+ ret = load_microcode_amd(bsp, c->x86, fw->data, fw->size);
fw_release:
release_firmware(fw);
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index afdfd237b59f..f522415bf9e5 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -619,6 +619,9 @@ int __init save_microcode_in_initrd_intel(void)
show_saved_mc();
+ /* initrd is going away, clear patch ptr. */
+ intel_ucode_patch = NULL;
+
return 0;
}
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index da5c09789984..43e10d6fdbed 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -161,8 +161,8 @@ void kvm_async_pf_task_wait(u32 token)
*/
rcu_irq_exit();
native_safe_halt();
- rcu_irq_enter();
local_irq_disable();
+ rcu_irq_enter();
}
}
if (!n.halted)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 0bb88428cbf2..3ca198080ea9 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -545,17 +545,6 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
}
/*
- * Return saved PC of a blocked thread.
- * What is this good for? it will be always the scheduler or ret_from_fork.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- struct inactive_task_frame *frame =
- (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp);
- return READ_ONCE_NOCHECK(frame->ret_addr);
-}
-
-/*
* Called from fs/proc with a reference on @p to find the function
* which called into schedule(). This needs to be done carefully
* because the task might wake up and we might look at a stack
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index ff40e74c9181..ffeae818aa7a 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -78,7 +78,7 @@ void __show_regs(struct pt_regs *regs, int all)
printk(KERN_DEFAULT "EIP: %pS\n", (void *)regs->ip);
printk(KERN_DEFAULT "EFLAGS: %08lx CPU: %d\n", regs->flags,
- smp_processor_id());
+ raw_smp_processor_id());
printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
regs->ax, regs->bx, regs->cx, regs->dx);
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 207b8f2582c7..213ddf3e937d 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -144,7 +144,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (end - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
@@ -187,7 +187,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 4b1724059909..a4eb27918ceb 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -514,7 +514,7 @@ int tboot_force_iommu(void)
if (!tboot_enabled())
return 0;
- if (!intel_iommu_tboot_noforce)
+ if (intel_iommu_tboot_noforce)
return 1;
if (no_iommu || swiotlb || dmar_disabled)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 3995d3a777d4..bf54309b85da 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -182,7 +182,7 @@ int is_valid_bugaddr(unsigned long addr)
return ud == INSN_UD0 || ud == INSN_UD2;
}
-static int fixup_bug(struct pt_regs *regs, int trapnr)
+int fixup_bug(struct pt_regs *regs, int trapnr)
{
if (trapnr != X86_TRAP_UD)
return 0;
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index a181ae76c71c..59ca2eea522c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -780,18 +780,20 @@ out:
static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
{
struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
- int j, nent = vcpu->arch.cpuid_nent;
+ struct kvm_cpuid_entry2 *ej;
+ int j = i;
+ int nent = vcpu->arch.cpuid_nent;
e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
/* when no next entry is found, the current entry[i] is reselected */
- for (j = i + 1; ; j = (j + 1) % nent) {
- struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
- if (ej->function == e->function) {
- ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
- return j;
- }
- }
- return 0; /* silence gcc, even though control never reaches here */
+ do {
+ j = (j + 1) % nent;
+ ej = &vcpu->arch.cpuid_entries[j];
+ } while (ej->function != e->function);
+
+ ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
+
+ return j;
}
/* find an entry with matching function, matching index (if needed), and that
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 0816ab2e8adc..80890dee66ce 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2742,6 +2742,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
}
+ ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
return X86EMUL_CONTINUE;
}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c329d2894905..d24c8742d9b0 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1495,8 +1495,10 @@ EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
static void cancel_hv_timer(struct kvm_lapic *apic)
{
+ preempt_disable();
kvm_x86_ops->cancel_hv_timer(apic->vcpu);
apic->lapic_timer.hv_timer_in_use = false;
+ preempt_enable();
}
static bool start_hv_timer(struct kvm_lapic *apic)
@@ -1934,7 +1936,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
for (i = 0; i < KVM_APIC_LVT_NUM; i++)
kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
apic_update_lvtt(apic);
- if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
+ if (kvm_vcpu_is_reset_bsp(vcpu) &&
+ kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
kvm_lapic_set_reg(apic, APIC_LVT0,
SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 5d3376f67794..cb8225969255 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3698,12 +3698,15 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
}
-static bool can_do_async_pf(struct kvm_vcpu *vcpu)
+bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
{
if (unlikely(!lapic_in_kernel(vcpu) ||
kvm_event_needs_reinjection(vcpu)))
return false;
+ if (is_guest_mode(vcpu))
+ return false;
+
return kvm_x86_ops->interrupt_allowed(vcpu);
}
@@ -3719,7 +3722,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
if (!async)
return false; /* *pfn has correct page already */
- if (!prefault && can_do_async_pf(vcpu)) {
+ if (!prefault && kvm_can_do_async_pf(vcpu)) {
trace_kvm_try_async_get_page(gva, gfn);
if (kvm_find_async_pf_gfn(vcpu, gfn)) {
trace_kvm_async_pf_doublefault(gva, gfn);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 27975807cc64..330bf3a811fb 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -76,6 +76,7 @@ int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct);
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
bool accessed_dirty);
+bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
{
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 183ddb235fb4..ba9891ac5c56 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1807,7 +1807,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
* AMD's VMCB does not have an explicit unusable field, so emulate it
* for cross vendor migration purposes by "not present"
*/
- var->unusable = !var->present || (var->type == 0);
+ var->unusable = !var->present;
switch (seg) {
case VCPU_SREG_TR:
@@ -1840,6 +1840,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
*/
if (var->unusable)
var->db = 0;
+ /* This is symmetric with svm_set_segment() */
var->dpl = to_svm(vcpu)->vmcb->save.cpl;
break;
}
@@ -1980,18 +1981,14 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
s->base = var->base;
s->limit = var->limit;
s->selector = var->selector;
- if (var->unusable)
- s->attrib = 0;
- else {
- s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
- s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
- s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
- s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT;
- s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
- s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
- s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
- s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
- }
+ s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
+ s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
+ s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
+ s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT;
+ s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
+ s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
+ s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
+ s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
/*
* This is always accurate, except if SYSRET returned to a segment
@@ -2000,7 +1997,8 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
* would entail passing the CPL to userspace and back.
*/
if (seg == VCPU_SREG_SS)
- svm->vmcb->save.cpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
+ /* This is symmetric with svm_get_segment() */
+ svm->vmcb->save.cpl = (var->dpl & 3);
mark_dirty(svm->vmcb, VMCB_SEG);
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 72f78396bc09..ca5d2b93385c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2425,7 +2425,7 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
if (!(vmcs12->exception_bitmap & (1u << nr)))
return 0;
- nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
+ nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
vmcs_read32(VM_EXIT_INTR_INFO),
vmcs_readl(EXIT_QUALIFICATION));
return 1;
@@ -6914,97 +6914,21 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
return 0;
}
-/*
- * This function performs the various checks including
- * - if it's 4KB aligned
- * - No bits beyond the physical address width are set
- * - Returns 0 on success or else 1
- * (Intel SDM Section 30.3)
- */
-static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
- gpa_t *vmpointer)
+static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
{
gva_t gva;
- gpa_t vmptr;
struct x86_exception e;
- struct page *page;
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- int maxphyaddr = cpuid_maxphyaddr(vcpu);
if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
return 1;
- if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
- sizeof(vmptr), &e)) {
+ if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, vmpointer,
+ sizeof(*vmpointer), &e)) {
kvm_inject_page_fault(vcpu, &e);
return 1;
}
- switch (exit_reason) {
- case EXIT_REASON_VMON:
- /*
- * SDM 3: 24.11.5
- * The first 4 bytes of VMXON region contain the supported
- * VMCS revision identifier
- *
- * Note - IA32_VMX_BASIC[48] will never be 1
- * for the nested case;
- * which replaces physical address width with 32
- *
- */
- if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
- nested_vmx_failInvalid(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
- }
-
- page = nested_get_page(vcpu, vmptr);
- if (page == NULL) {
- nested_vmx_failInvalid(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
- }
- if (*(u32 *)kmap(page) != VMCS12_REVISION) {
- kunmap(page);
- nested_release_page_clean(page);
- nested_vmx_failInvalid(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
- }
- kunmap(page);
- nested_release_page_clean(page);
- vmx->nested.vmxon_ptr = vmptr;
- break;
- case EXIT_REASON_VMCLEAR:
- if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
- nested_vmx_failValid(vcpu,
- VMXERR_VMCLEAR_INVALID_ADDRESS);
- return kvm_skip_emulated_instruction(vcpu);
- }
-
- if (vmptr == vmx->nested.vmxon_ptr) {
- nested_vmx_failValid(vcpu,
- VMXERR_VMCLEAR_VMXON_POINTER);
- return kvm_skip_emulated_instruction(vcpu);
- }
- break;
- case EXIT_REASON_VMPTRLD:
- if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
- nested_vmx_failValid(vcpu,
- VMXERR_VMPTRLD_INVALID_ADDRESS);
- return kvm_skip_emulated_instruction(vcpu);
- }
-
- if (vmptr == vmx->nested.vmxon_ptr) {
- nested_vmx_failValid(vcpu,
- VMXERR_VMPTRLD_VMXON_POINTER);
- return kvm_skip_emulated_instruction(vcpu);
- }
- break;
- default:
- return 1; /* shouldn't happen */
- }
-
- if (vmpointer)
- *vmpointer = vmptr;
return 0;
}
@@ -7066,6 +6990,8 @@ out_msr_bitmap:
static int handle_vmon(struct kvm_vcpu *vcpu)
{
int ret;
+ gpa_t vmptr;
+ struct page *page;
struct vcpu_vmx *vmx = to_vmx(vcpu);
const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
| FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
@@ -7095,9 +7021,37 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
return 1;
}
- if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL))
+ if (nested_vmx_get_vmptr(vcpu, &vmptr))
return 1;
-
+
+ /*
+ * SDM 3: 24.11.5
+ * The first 4 bytes of VMXON region contain the supported
+ * VMCS revision identifier
+ *
+ * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
+ * which replaces physical address width with 32
+ */
+ if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
+ nested_vmx_failInvalid(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+
+ page = nested_get_page(vcpu, vmptr);
+ if (page == NULL) {
+ nested_vmx_failInvalid(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+ if (*(u32 *)kmap(page) != VMCS12_REVISION) {
+ kunmap(page);
+ nested_release_page_clean(page);
+ nested_vmx_failInvalid(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+ kunmap(page);
+ nested_release_page_clean(page);
+
+ vmx->nested.vmxon_ptr = vmptr;
ret = enter_vmx_operation(vcpu);
if (ret)
return ret;
@@ -7213,9 +7167,19 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
if (!nested_vmx_check_permission(vcpu))
return 1;
- if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMCLEAR, &vmptr))
+ if (nested_vmx_get_vmptr(vcpu, &vmptr))
return 1;
+ if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
+ nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+
+ if (vmptr == vmx->nested.vmxon_ptr) {
+ nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_VMXON_POINTER);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+
if (vmptr == vmx->nested.current_vmptr)
nested_release_vmcs12(vmx);
@@ -7545,9 +7509,19 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
if (!nested_vmx_check_permission(vcpu))
return 1;
- if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMPTRLD, &vmptr))
+ if (nested_vmx_get_vmptr(vcpu, &vmptr))
return 1;
+ if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
+ nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+
+ if (vmptr == vmx->nested.vmxon_ptr) {
+ nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+
if (vmx->nested.current_vmptr != vmptr) {
struct vmcs12 *new_vmcs12;
struct page *page;
@@ -7913,11 +7887,13 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
{
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
int cr = exit_qualification & 15;
- int reg = (exit_qualification >> 8) & 15;
- unsigned long val = kvm_register_readl(vcpu, reg);
+ int reg;
+ unsigned long val;
switch ((exit_qualification >> 4) & 3) {
case 0: /* mov to cr */
+ reg = (exit_qualification >> 8) & 15;
+ val = kvm_register_readl(vcpu, reg);
switch (cr) {
case 0:
if (vmcs12->cr0_guest_host_mask &
@@ -7972,6 +7948,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
* lmsw can change bits 1..3 of cr0, and only set bit 0 of
* cr0. Other attempted changes are ignored, with no exit.
*/
+ val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
if (vmcs12->cr0_guest_host_mask & 0xe &
(val ^ vmcs12->cr0_read_shadow))
return true;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 02363e37d4a6..0e846f0cb83b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5313,6 +5313,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
ctxt->eflags = kvm_get_rflags(vcpu);
+ ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
+
ctxt->eip = kvm_rip_read(vcpu);
ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
(ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
@@ -5528,36 +5530,25 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
return dr6;
}
-static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
+static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
{
struct kvm_run *kvm_run = vcpu->run;
- /*
- * rflags is the old, "raw" value of the flags. The new value has
- * not been saved yet.
- *
- * This is correct even for TF set by the guest, because "the
- * processor will not generate this exception after the instruction
- * that sets the TF flag".
- */
- if (unlikely(rflags & X86_EFLAGS_TF)) {
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
- kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
- DR6_RTM;
- kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
- kvm_run->debug.arch.exception = DB_VECTOR;
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
- *r = EMULATE_USER_EXIT;
- } else {
- /*
- * "Certain debug exceptions may clear bit 0-3. The
- * remaining contents of the DR6 register are never
- * cleared by the processor".
- */
- vcpu->arch.dr6 &= ~15;
- vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
- kvm_queue_exception(vcpu, DB_VECTOR);
- }
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
+ kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+ kvm_run->debug.arch.exception = DB_VECTOR;
+ kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ *r = EMULATE_USER_EXIT;
+ } else {
+ /*
+ * "Certain debug exceptions may clear bit 0-3. The
+ * remaining contents of the DR6 register are never
+ * cleared by the processor".
+ */
+ vcpu->arch.dr6 &= ~15;
+ vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
+ kvm_queue_exception(vcpu, DB_VECTOR);
}
}
@@ -5567,7 +5558,17 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
int r = EMULATE_DONE;
kvm_x86_ops->skip_emulated_instruction(vcpu);
- kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+
+ /*
+ * rflags is the old, "raw" value of the flags. The new value has
+ * not been saved yet.
+ *
+ * This is correct even for TF set by the guest, because "the
+ * processor will not generate this exception after the instruction
+ * that sets the TF flag".
+ */
+ if (unlikely(rflags & X86_EFLAGS_TF))
+ kvm_vcpu_do_singlestep(vcpu, &r);
return r == EMULATE_DONE;
}
EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
@@ -5726,8 +5727,9 @@ restart:
toggle_interruptibility(vcpu, ctxt->interruptibility);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
kvm_rip_write(vcpu, ctxt->eip);
- if (r == EMULATE_DONE)
- kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+ if (r == EMULATE_DONE &&
+ (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
+ kvm_vcpu_do_singlestep(vcpu, &r);
if (!ctxt->have_exception ||
exception_type(ctxt->exception.vector) == EXCPT_TRAP)
__kvm_set_rflags(vcpu, ctxt->eflags);
@@ -8394,10 +8396,13 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
if (vcpu->arch.pv.pv_unhalted)
return true;
- if (atomic_read(&vcpu->arch.nmi_queued))
+ if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
+ (vcpu->arch.nmi_pending &&
+ kvm_x86_ops->nmi_allowed(vcpu)))
return true;
- if (kvm_test_request(KVM_REQ_SMI, vcpu))
+ if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
+ (vcpu->arch.smi_pending && !is_smm(vcpu)))
return true;
if (kvm_arch_interrupt_allowed(vcpu) &&
@@ -8604,8 +8609,7 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
return true;
else
- return !kvm_event_needs_reinjection(vcpu) &&
- kvm_x86_ops->interrupt_allowed(vcpu);
+ return kvm_can_do_async_pf(vcpu);
}
void kvm_arch_start_assignment(struct kvm *kvm)
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 35ea061010a1..0ea8afcb929c 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -162,6 +162,9 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
if (fixup_exception(regs, trapnr))
return;
+ if (fixup_bug(regs, trapnr))
+ return;
+
fail:
early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n",
(unsigned)trapnr, (unsigned long)regs->cs, regs->ip,
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 302f43fd9c28..adad702b39cd 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -148,7 +148,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
if (mm->get_unmapped_area == arch_get_unmapped_area)
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index cbc87ea98751..9b3f9fa5b283 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -161,16 +161,16 @@ static int page_size_mask;
static void __init probe_page_size_mask(void)
{
-#if !defined(CONFIG_KMEMCHECK)
/*
* For CONFIG_KMEMCHECK or pagealloc debugging, identity mapping will
* use small pages.
* This will simplify cpa(), which otherwise needs to support splitting
* large pages into small in interrupt context, etc.
*/
- if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled())
+ if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled() && !IS_ENABLED(CONFIG_KMEMCHECK))
page_size_mask |= 1 << PG_LEVEL_2M;
-#endif
+ else
+ direct_gbpages = 0;
/* Enable PSE if available */
if (boot_cpu_has(X86_FEATURE_PSE))
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 95651dc58e09..0a59daf799f8 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -990,7 +990,13 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
pud_base = pud_offset(p4d, 0);
remove_pud_table(pud_base, addr, next, direct);
- free_pud_table(pud_base, p4d);
+ /*
+ * For 4-level page tables we do not want to free PUDs, but in the
+ * 5-level case we should free them. This code will have to change
+ * to adapt for boot-time switching between 4 and 5 level page tables.
+ */
+ if (CONFIG_PGTABLE_LEVELS == 5)
+ free_pud_table(pud_base, p4d);
}
if (direct)
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 83a59a67757a..9b78685b66e6 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -65,11 +65,9 @@ static int __init nopat(char *str)
}
early_param("nopat", nopat);
-static bool __read_mostly __pat_initialized = false;
-
bool pat_enabled(void)
{
- return __pat_initialized;
+ return !!__pat_enabled;
}
EXPORT_SYMBOL_GPL(pat_enabled);
@@ -227,14 +225,13 @@ static void pat_bsp_init(u64 pat)
}
wrmsrl(MSR_IA32_CR_PAT, pat);
- __pat_initialized = true;
__init_cache_modes(pat);
}
static void pat_ap_init(u64 pat)
{
- if (!this_cpu_has(X86_FEATURE_PAT)) {
+ if (!boot_cpu_has(X86_FEATURE_PAT)) {
/*
* If this happens we are on a secondary CPU, but switched to
* PAT on the boot CPU. We have no way to undo PAT.
@@ -309,7 +306,7 @@ void pat_init(void)
u64 pat;
struct cpuinfo_x86 *c = &boot_cpu_data;
- if (!__pat_enabled) {
+ if (!pat_enabled()) {
init_cache_modes();
return;
}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 7e76a4d8304b..43b96f5f78ba 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -828,9 +828,11 @@ static void __init kexec_enter_virtual_mode(void)
/*
* We don't do virtual mode, since we don't do runtime services, on
- * non-native EFI
+ * non-native EFI. With efi=old_map, we don't do runtime services in
+ * kexec kernel because in the initial boot something else might
+ * have been mapped at these virtual addresses.
*/
- if (!efi_is_native()) {
+ if (!efi_is_native() || efi_enabled(EFI_OLD_MEMMAP)) {
efi_memmap_unmap();
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
return;
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index c488625c9712..eb8dff15a7f6 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -71,11 +71,13 @@ static void __init early_code_mapping_set_exec(int executable)
pgd_t * __init efi_call_phys_prolog(void)
{
- unsigned long vaddress;
- pgd_t *save_pgd;
+ unsigned long vaddr, addr_pgd, addr_p4d, addr_pud;
+ pgd_t *save_pgd, *pgd_k, *pgd_efi;
+ p4d_t *p4d, *p4d_k, *p4d_efi;
+ pud_t *pud;
int pgd;
- int n_pgds;
+ int n_pgds, i, j;
if (!efi_enabled(EFI_OLD_MEMMAP)) {
save_pgd = (pgd_t *)read_cr3();
@@ -88,10 +90,49 @@ pgd_t * __init efi_call_phys_prolog(void)
n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL);
+ /*
+ * Build 1:1 identity mapping for efi=old_map usage. Note that
+ * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while
+ * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical
+ * address X, the pud_index(X) != pud_index(__va(X)), we can only copy
+ * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping.
+ * This means here we can only reuse the PMD tables of the direct mapping.
+ */
for (pgd = 0; pgd < n_pgds; pgd++) {
- save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE);
- vaddress = (unsigned long)__va(pgd * PGDIR_SIZE);
- set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
+ addr_pgd = (unsigned long)(pgd * PGDIR_SIZE);
+ vaddr = (unsigned long)__va(pgd * PGDIR_SIZE);
+ pgd_efi = pgd_offset_k(addr_pgd);
+ save_pgd[pgd] = *pgd_efi;
+
+ p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd);
+ if (!p4d) {
+ pr_err("Failed to allocate p4d table!\n");
+ goto out;
+ }
+
+ for (i = 0; i < PTRS_PER_P4D; i++) {
+ addr_p4d = addr_pgd + i * P4D_SIZE;
+ p4d_efi = p4d + p4d_index(addr_p4d);
+
+ pud = pud_alloc(&init_mm, p4d_efi, addr_p4d);
+ if (!pud) {
+ pr_err("Failed to allocate pud table!\n");
+ goto out;
+ }
+
+ for (j = 0; j < PTRS_PER_PUD; j++) {
+ addr_pud = addr_p4d + j * PUD_SIZE;
+
+ if (addr_pud > (max_pfn << PAGE_SHIFT))
+ break;
+
+ vaddr = (unsigned long)__va(addr_pud);
+
+ pgd_k = pgd_offset_k(vaddr);
+ p4d_k = p4d_offset(pgd_k, vaddr);
+ pud[j] = *pud_offset(p4d_k, vaddr);
+ }
+ }
}
out:
__flush_tlb_all();
@@ -104,8 +145,11 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
/*
* After the lock is released, the original page table is restored.
*/
- int pgd_idx;
+ int pgd_idx, i;
int nr_pgds;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
if (!efi_enabled(EFI_OLD_MEMMAP)) {
write_cr3((unsigned long)save_pgd);
@@ -115,9 +159,28 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
- for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++)
+ for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) {
+ pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
+ if (!(pgd_val(*pgd) & _PAGE_PRESENT))
+ continue;
+
+ for (i = 0; i < PTRS_PER_P4D; i++) {
+ p4d = p4d_offset(pgd,
+ pgd_idx * PGDIR_SIZE + i * P4D_SIZE);
+
+ if (!(p4d_val(*p4d) & _PAGE_PRESENT))
+ continue;
+
+ pud = (pud_t *)p4d_page_vaddr(*p4d);
+ pud_free(&init_mm, pud);
+ }
+
+ p4d = (p4d_t *)pgd_page_vaddr(*pgd);
+ p4d_free(&init_mm, p4d);
+ }
+
kfree(save_pgd);
__flush_tlb_all();
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 26615991d69c..e0cf95a83f3f 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -360,6 +360,9 @@ void __init efi_free_boot_services(void)
free_bootmem_late(start, size);
}
+ if (!num_entries)
+ return;
+
new_size = efi.memmap.desc_size * num_entries;
new_phys = efi_memmap_alloc(num_entries);
if (!new_phys) {