summaryrefslogtreecommitdiff
path: root/arch/x86/boot
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-03 14:45:09 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-03 14:45:09 -0700
commit7a69f9c60b49699579f5bfb71f928cceba0afe1a (patch)
treebf3b5640bbd9f23beeb5a55d18348d65bafff8e8 /arch/x86/boot
parent9bc088ab66be8978fbc981ba9644468fa2c2fd3f (diff)
parent8781fb7e9749da424e01daacd14834b674658c63 (diff)
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm updates from Ingo Molnar: "The main changes in this cycle were: - Continued work to add support for 5-level paging provided by future Intel CPUs. In particular we switch the x86 GUP code to the generic implementation. (Kirill A. Shutemov) - Continued work to add PCID CPU support to native kernels as well. In this round most of the focus is on reworking/refreshing the TLB flush infrastructure for the upcoming PCID changes. (Andy Lutomirski)" * 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (34 commits) x86/mm: Delete a big outdated comment about TLB flushing x86/mm: Don't reenter flush_tlb_func_common() x86/KASLR: Fix detection 32/64 bit bootloaders for 5-level paging x86/ftrace: Exclude functions in head64.c from function-tracing x86/mmap, ASLR: Do not treat unlimited-stack tasks as legacy mmap x86/mm: Remove reset_lazy_tlbstate() x86/ldt: Simplify the LDT switching logic x86/boot/64: Put __startup_64() into .head.text x86/mm: Add support for 5-level paging for KASLR x86/mm: Make kernel_physical_mapping_init() support 5-level paging x86/mm: Add sync_global_pgds() for configuration with 5-level paging x86/boot/64: Add support of additional page table level during early boot x86/boot/64: Rename init_level4_pgt and early_level4_pgt x86/boot/64: Rewrite startup_64() in C x86/boot/compressed: Enable 5-level paging during decompression stage x86/boot/efi: Define __KERNEL32_CS GDT on 64-bit configurations x86/boot/efi: Fix __KERNEL_CS definition of GDT entry on 64-bit configurations x86/boot/efi: Cleanup initialization of GDT entries x86/asm: Fix comment in return_from_SYSCALL_64() x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation ...
Diffstat (limited to 'arch/x86/boot')
-rw-r--r--arch/x86/boot/compressed/eboot.c73
-rw-r--r--arch/x86/boot/compressed/head_64.S86
-rw-r--r--arch/x86/boot/compressed/pagetable.c18
3 files changed, 148 insertions, 29 deletions
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index cbf4b87f55b9..c3e869eaef0c 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -1046,9 +1046,31 @@ struct boot_params *efi_main(struct efi_config *c,
memset((char *)gdt->address, 0x0, gdt->size);
desc = (struct desc_struct *)gdt->address;
- /* The first GDT is a dummy and the second is unused. */
- desc += 2;
+ /* The first GDT is a dummy. */
+ desc++;
+
+ if (IS_ENABLED(CONFIG_X86_64)) {
+ /* __KERNEL32_CS */
+ desc->limit0 = 0xffff;
+ desc->base0 = 0x0000;
+ desc->base1 = 0x0000;
+ desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
+ desc->s = DESC_TYPE_CODE_DATA;
+ desc->dpl = 0;
+ desc->p = 1;
+ desc->limit = 0xf;
+ desc->avl = 0;
+ desc->l = 0;
+ desc->d = SEG_OP_SIZE_32BIT;
+ desc->g = SEG_GRANULARITY_4KB;
+ desc->base2 = 0x00;
+ desc++;
+ } else {
+ /* Second entry is unused on 32-bit */
+ desc++;
+ }
+ /* __KERNEL_CS */
desc->limit0 = 0xffff;
desc->base0 = 0x0000;
desc->base1 = 0x0000;
@@ -1058,12 +1080,18 @@ struct boot_params *efi_main(struct efi_config *c,
desc->p = 1;
desc->limit = 0xf;
desc->avl = 0;
- desc->l = 0;
- desc->d = SEG_OP_SIZE_32BIT;
+ if (IS_ENABLED(CONFIG_X86_64)) {
+ desc->l = 1;
+ desc->d = 0;
+ } else {
+ desc->l = 0;
+ desc->d = SEG_OP_SIZE_32BIT;
+ }
desc->g = SEG_GRANULARITY_4KB;
desc->base2 = 0x00;
-
desc++;
+
+ /* __KERNEL_DS */
desc->limit0 = 0xffff;
desc->base0 = 0x0000;
desc->base1 = 0x0000;
@@ -1077,24 +1105,25 @@ struct boot_params *efi_main(struct efi_config *c,
desc->d = SEG_OP_SIZE_32BIT;
desc->g = SEG_GRANULARITY_4KB;
desc->base2 = 0x00;
-
-#ifdef CONFIG_X86_64
- /* Task segment value */
desc++;
- desc->limit0 = 0x0000;
- desc->base0 = 0x0000;
- desc->base1 = 0x0000;
- desc->type = SEG_TYPE_TSS;
- desc->s = 0;
- desc->dpl = 0;
- desc->p = 1;
- desc->limit = 0x0;
- desc->avl = 0;
- desc->l = 0;
- desc->d = 0;
- desc->g = SEG_GRANULARITY_4KB;
- desc->base2 = 0x00;
-#endif /* CONFIG_X86_64 */
+
+ if (IS_ENABLED(CONFIG_X86_64)) {
+ /* Task segment value */
+ desc->limit0 = 0x0000;
+ desc->base0 = 0x0000;
+ desc->base1 = 0x0000;
+ desc->type = SEG_TYPE_TSS;
+ desc->s = 0;
+ desc->dpl = 0;
+ desc->p = 1;
+ desc->limit = 0x0;
+ desc->avl = 0;
+ desc->l = 0;
+ desc->d = 0;
+ desc->g = SEG_GRANULARITY_4KB;
+ desc->base2 = 0x00;
+ desc++;
+ }
asm volatile("cli");
asm volatile ("lgdt %0" : : "m" (*gdt));
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index d2ae1f821e0c..fbf4c32d0b62 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -346,6 +346,48 @@ preferred_addr:
/* Set up the stack */
leaq boot_stack_end(%rbx), %rsp
+#ifdef CONFIG_X86_5LEVEL
+ /* Check if 5-level paging has already enabled */
+ movq %cr4, %rax
+ testl $X86_CR4_LA57, %eax
+ jnz lvl5
+
+ /*
+ * At this point we are in long mode with 4-level paging enabled,
+ * but we want to enable 5-level paging.
+ *
+ * The problem is that we cannot do it directly. Setting LA57 in
+ * long mode would trigger #GP. So we need to switch off long mode
+ * first.
+ *
+ * NOTE: This is not going to work if bootloader put us above 4G
+ * limit.
+ *
+ * The first step is go into compatibility mode.
+ */
+
+ /* Clear additional page table */
+ leaq lvl5_pgtable(%rbx), %rdi
+ xorq %rax, %rax
+ movq $(PAGE_SIZE/8), %rcx
+ rep stosq
+
+ /*
+ * Setup current CR3 as the first and only entry in a new top level
+ * page table.
+ */
+ movq %cr3, %rdi
+ leaq 0x7 (%rdi), %rax
+ movq %rax, lvl5_pgtable(%rbx)
+
+ /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
+ pushq $__KERNEL32_CS
+ leaq compatible_mode(%rip), %rax
+ pushq %rax
+ lretq
+lvl5:
+#endif
+
/* Zero EFLAGS */
pushq $0
popfq
@@ -429,6 +471,44 @@ relocated:
jmp *%rax
.code32
+#ifdef CONFIG_X86_5LEVEL
+compatible_mode:
+ /* Setup data and stack segments */
+ movl $__KERNEL_DS, %eax
+ movl %eax, %ds
+ movl %eax, %ss
+
+ /* Disable paging */
+ movl %cr0, %eax
+ btrl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+
+ /* Point CR3 to 5-level paging */
+ leal lvl5_pgtable(%ebx), %eax
+ movl %eax, %cr3
+
+ /* Enable PAE and LA57 mode */
+ movl %cr4, %eax
+ orl $(X86_CR4_PAE | X86_CR4_LA57), %eax
+ movl %eax, %cr4
+
+ /* Calculate address we are running at */
+ call 1f
+1: popl %edi
+ subl $1b, %edi
+
+ /* Prepare stack for far return to Long Mode */
+ pushl $__KERNEL_CS
+ leal lvl5(%edi), %eax
+ push %eax
+
+ /* Enable paging back */
+ movl $(X86_CR0_PG | X86_CR0_PE), %eax
+ movl %eax, %cr0
+
+ lret
+#endif
+
no_longmode:
/* This isn't an x86-64 CPU so hang */
1:
@@ -442,7 +522,7 @@ gdt:
.word gdt_end - gdt
.long gdt
.word 0
- .quad 0x0000000000000000 /* NULL descriptor */
+ .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
.quad 0x00af9a000000ffff /* __KERNEL_CS */
.quad 0x00cf92000000ffff /* __KERNEL_DS */
.quad 0x0080890000000000 /* TS descriptor */
@@ -486,3 +566,7 @@ boot_stack_end:
.balign 4096
pgtable:
.fill BOOT_PGT_SIZE, 1, 0
+#ifdef CONFIG_X86_5LEVEL
+lvl5_pgtable:
+ .fill PAGE_SIZE, 1, 0
+#endif
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
index 1d78f1739087..28029be47fbb 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -63,7 +63,7 @@ static void *alloc_pgt_page(void *context)
static struct alloc_pgt_data pgt_data;
/* The top level page table entry pointer. */
-static unsigned long level4p;
+static unsigned long top_level_pgt;
/*
* Mapping information structure passed to kernel_ident_mapping_init().
@@ -91,9 +91,15 @@ void initialize_identity_maps(void)
* If we came here via startup_32(), cr3 will be _pgtable already
* and we must append to the existing area instead of entirely
* overwriting it.
+ *
+ * With 5-level paging, we use '_pgtable' to allocate the p4d page table,
+ * the top-level page table is allocated separately.
+ *
+ * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level
+ * cases. On 4-level paging it's equal to 'top_level_pgt'.
*/
- level4p = read_cr3();
- if (level4p == (unsigned long)_pgtable) {
+ top_level_pgt = read_cr3_pa();
+ if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) {
debug_putstr("booted via startup_32()\n");
pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
@@ -103,7 +109,7 @@ void initialize_identity_maps(void)
pgt_data.pgt_buf = _pgtable;
pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
- level4p = (unsigned long)alloc_pgt_page(&pgt_data);
+ top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data);
}
}
@@ -123,7 +129,7 @@ void add_identity_map(unsigned long start, unsigned long size)
return;
/* Build the mapping. */
- kernel_ident_mapping_init(&mapping_info, (pgd_t *)level4p,
+ kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt,
start, end);
}
@@ -134,5 +140,5 @@ void add_identity_map(unsigned long start, unsigned long size)
*/
void finalize_identity_maps(void)
{
- write_cr3(level4p);
+ write_cr3(top_level_pgt);
}