diff options
Diffstat (limited to 'arch/x86/mm/init_32.c')
| -rw-r--r-- | arch/x86/mm/init_32.c | 273 |
1 files changed, 52 insertions, 221 deletions
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8a64a6f2848d..8a34fff6ab2b 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) 1995 Linus Torvalds @@ -23,7 +24,6 @@ #include <linux/pci.h> #include <linux/pfn.h> #include <linux/poison.h> -#include <linux/bootmem.h> #include <linux/memblock.h> #include <linux/proc_fs.h> #include <linux/memory_hotplug.h> @@ -35,7 +35,6 @@ #include <asm/bios_ebda.h> #include <asm/processor.h> #include <linux/uaccess.h> -#include <asm/pgtable.h> #include <asm/dma.h> #include <asm/fixmap.h> #include <asm/e820/api.h> @@ -46,11 +45,13 @@ #include <asm/olpc_ofw.h> #include <asm/pgalloc.h> #include <asm/sections.h> -#include <asm/paravirt.h> #include <asm/setup.h> #include <asm/set_memory.h> #include <asm/page_types.h> +#include <asm/cpu_entry_area.h> #include <asm/init.h> +#include <asm/pgtable_areas.h> +#include <asm/numa.h> #include "mm_internal.h" @@ -72,7 +73,6 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) #ifdef CONFIG_X86_PAE if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { pmd_table = (pmd_t *)alloc_low_page(); - paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); p4d = p4d_offset(pgd, 0); pud = pud_offset(p4d, 0); @@ -97,7 +97,6 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { pte_t *page_table = (pte_t *)alloc_low_page(); - paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); BUG_ON(page_table != pte_offset_kernel(pmd, 0)); } @@ -179,12 +178,10 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, set_pte(newpte + i, pte[i]); *adr = (void *)(((unsigned long)(*adr)) + PAGE_SIZE); - paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT); set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE)); BUG_ON(newpte != pte_offset_kernel(pmd, 0)); __flush_tlb_all(); - paravirt_release_pte(__pa(pte) >> PAGE_SHIFT); pte = newpte; } BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1) @@ -236,7 +233,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) } } -static inline int is_kernel_text(unsigned long addr) +static inline int is_x86_32_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end) return 1; @@ -251,7 +248,8 @@ static inline int is_kernel_text(unsigned long addr) unsigned long __init kernel_physical_mapping_init(unsigned long start, unsigned long end, - unsigned long page_size_mask) + unsigned long page_size_mask, + pgprot_t prot) { int use_pse = page_size_mask == (1<<PG_LEVEL_2M); unsigned long last_map_addr = end; @@ -326,8 +324,8 @@ repeat: addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; - if (is_kernel_text(addr) || - is_kernel_text(addr2)) + if (is_x86_32_kernel_text(addr) || + is_x86_32_kernel_text(addr2)) prot = PAGE_KERNEL_LARGE_EXEC; pages_2m++; @@ -352,7 +350,7 @@ repeat: */ pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR); - if (is_kernel_text(addr)) + if (is_x86_32_kernel_text(addr)) prot = PAGE_KERNEL_EXEC; pages_4k++; @@ -387,64 +385,14 @@ repeat: return last_map_addr; } -pte_t *kmap_pte; - -static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr) -{ - pgd_t *pgd = pgd_offset_k(vaddr); - p4d_t *p4d = p4d_offset(pgd, vaddr); - pud_t *pud = pud_offset(p4d, vaddr); - pmd_t *pmd = pmd_offset(pud, vaddr); - return pte_offset_kernel(pmd, vaddr); -} - -static void __init kmap_init(void) -{ - unsigned long kmap_vstart; - - /* - * Cache the first kmap pte: - */ - kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); - kmap_pte = kmap_get_fixmap_pte(kmap_vstart); -} - #ifdef CONFIG_HIGHMEM static void __init permanent_kmaps_init(pgd_t *pgd_base) { - unsigned long vaddr; - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; + unsigned long vaddr = PKMAP_BASE; - vaddr = PKMAP_BASE; page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); - pgd = swapper_pg_dir + pgd_index(vaddr); - p4d = p4d_offset(pgd, vaddr); - pud = pud_offset(p4d, vaddr); - pmd = pmd_offset(pud, vaddr); - pte = pte_offset_kernel(pmd, vaddr); - pkmap_page_table = pte; -} - -void __init add_highpages_with_active_regions(int nid, - unsigned long start_pfn, unsigned long end_pfn) -{ - phys_addr_t start, end; - u64 i; - - for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &start, &end, NULL) { - unsigned long pfn = clamp_t(unsigned long, PFN_UP(start), - start_pfn, end_pfn); - unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end), - start_pfn, end_pfn); - for ( ; pfn < e_pfn; pfn++) - if (pfn_valid(pfn)) - free_highmem_page(pfn_to_page(pfn)); - } + pkmap_page_table = virt_to_kpte(vaddr); } #else static inline void permanent_kmaps_init(pgd_t *pgd_base) @@ -452,6 +400,21 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base) } #endif /* CONFIG_HIGHMEM */ +void __init sync_initial_page_table(void) +{ + clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + KERNEL_PGD_PTRS); + + /* + * sync back low identity map too. It is used for example + * in the 32-bit EFI stub. + */ + clone_pgd_range(initial_page_table, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); +} + void __init native_pagetable_init(void) { unsigned long pfn, va; @@ -483,7 +446,7 @@ void __init native_pagetable_init(void) break; /* should not be large page here */ - if (pmd_large(*pmd)) { + if (pmd_leaf(*pmd)) { pr_warn("try to clear pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx, but pmd is big page and is not using pte !\n", pfn, pmd, __pa(pmd)); BUG_ON(1); @@ -497,7 +460,6 @@ void __init native_pagetable_init(void) pfn, pmd, __pa(pmd), pte, __pa(pte)); pte_clear(NULL, va, pte); } - paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); paging_init(); } @@ -506,15 +468,8 @@ void __init native_pagetable_init(void) * point, we've been running on some set of pagetables constructed by * the boot process. * - * If we're booting on native hardware, this will be a pagetable - * constructed in arch/x86/kernel/head_32.S. The root of the - * pagetable will be swapper_pg_dir. - * - * If we're booting paravirtualized under a hypervisor, then there are - * more options: we may already be running PAE, and the pagetable may - * or may not be based in swapper_pg_dir. In any case, - * paravirt_pagetable_init() will set up swapper_pg_dir - * appropriately for the rest of the initialization to work. + * This will be a pagetable constructed in arch/x86/kernel/head_32.S. + * The root of the pagetable will be swapper_pg_dir. * * In general, pagetable_init() assumes that the pagetable may already * be partially populated, and so it avoids stomping on any existing @@ -542,8 +497,14 @@ static void __init pagetable_init(void) permanent_kmaps_init(pgd_base); } -pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL); +#define DEFAULT_PTE_MASK ~(_PAGE_NX | _PAGE_GLOBAL) +/* Bits supported by the hardware: */ +pteval_t __supported_pte_mask __read_mostly = DEFAULT_PTE_MASK; +/* Bits allowed in normal kernel mappings: */ +pteval_t __default_kernel_pte_mask __read_mostly = DEFAULT_PTE_MASK; EXPORT_SYMBOL_GPL(__supported_pte_mask); +/* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */ +EXPORT_SYMBOL(__default_kernel_pte_mask); /* user-defined highmem size */ static unsigned int highmem_pages = -1; @@ -604,7 +565,7 @@ static void __init lowmem_pfn_init(void) "only %luMB highmem pages available, ignoring highmem size of %luMB!\n" #define MSG_HIGHMEM_TRIMMED \ - "Warning: only 4GB will be used. Use a HIGHMEM64G enabled kernel!\n" + "Warning: only 4GB will be used. Support for CONFIG_HIGHMEM64G was removed!\n" /* * We have more RAM than fits into lowmem - we try to put it into * highmem, also taking the highmem=x boot parameter into account: @@ -628,18 +589,13 @@ static void __init highmem_pfn_init(void) #ifndef CONFIG_HIGHMEM /* Maximum memory usable is what is directly addressable */ printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20); - if (max_pfn > MAX_NONPAE_PFN) - printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n"); - else - printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); + printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); max_pfn = MAXMEM_PFN; #else /* !CONFIG_HIGHMEM */ -#ifndef CONFIG_HIGHMEM64G if (max_pfn > MAX_NONPAE_PFN) { max_pfn = MAX_NONPAE_PFN; printk(KERN_WARNING MSG_HIGHMEM_TRIMMED); } -#endif /* !CONFIG_HIGHMEM64G */ #endif /* !CONFIG_HIGHMEM */ } @@ -656,7 +612,6 @@ void __init find_low_pfn_range(void) highmem_pfn_init(); } -#ifndef CONFIG_NEED_MULTIPLE_NODES void __init initmem_init(void) { #ifdef CONFIG_HIGHMEM @@ -670,23 +625,13 @@ void __init initmem_init(void) high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; #endif - memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); - sparse_memory_present_with_active_regions(0); + memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0); -#ifdef CONFIG_FLATMEM - max_mapnr = IS_ENABLED(CONFIG_HIGHMEM) ? highend_pfn : max_low_pfn; -#endif __vmalloc_start_set = true; printk(KERN_NOTICE "%ldMB LOWMEM available.\n", pages_to_mb(max_low_pfn)); - setup_bootmem_allocator(); -} -#endif /* !CONFIG_NEED_MULTIPLE_NODES */ - -void __init setup_bootmem_allocator(void) -{ printk(KERN_INFO " mapped low ram: 0 - %08lx\n", max_pfn_mapped<<PAGE_SHIFT); printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); @@ -705,13 +650,10 @@ void __init paging_init(void) __flush_tlb_all(); - kmap_init(); - /* * NOTE: at this point the bootmem allocator is fully available. */ olpc_dt_build_devicetree(); - sparse_memory_present_with_active_regions(MAX_NUMNODES); sparse_init(); zone_sizes_init(); } @@ -730,7 +672,7 @@ static void __init test_wp_bit(void) __set_fixmap(FIX_WP_TEST, __pa_symbol(empty_zero_page), PAGE_KERNEL_RO); - if (probe_kernel_write((char *)fix_to_virt(FIX_WP_TEST), &z, 1)) { + if (copy_to_kernel_nofault((char *)fix_to_virt(FIX_WP_TEST), &z, 1)) { clear_fixmap(FIX_WP_TEST); printk(KERN_CONT "Ok.\n"); return; @@ -740,63 +682,19 @@ static void __init test_wp_bit(void) panic("Linux doesn't support CPUs with broken WP."); } -void __init mem_init(void) +void __init arch_mm_preinit(void) { pci_iommu_alloc(); #ifdef CONFIG_FLATMEM BUG_ON(!mem_map); #endif - /* - * With CONFIG_DEBUG_PAGEALLOC initialization of highmem pages has to - * be done before free_all_bootmem(). Memblock use free low memory for - * temporary data (see find_range_array()) and for this purpose can use - * pages that was already passed to the buddy allocator, hence marked as - * not accessible in the page tables when compiled with - * CONFIG_DEBUG_PAGEALLOC. Otherwise order of initialization is not - * important here. - */ - set_highmem_pages_init(); - - /* this will put all low memory onto the freelists */ - free_all_bootmem(); +} +void __init mem_init(void) +{ after_bootmem = 1; - - mem_init_print_info(NULL); - printk(KERN_INFO "virtual kernel memory layout:\n" - " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" -#ifdef CONFIG_HIGHMEM - " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" -#endif - " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" - " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" - " .init : 0x%08lx - 0x%08lx (%4ld kB)\n" - " .data : 0x%08lx - 0x%08lx (%4ld kB)\n" - " .text : 0x%08lx - 0x%08lx (%4ld kB)\n", - FIXADDR_START, FIXADDR_TOP, - (FIXADDR_TOP - FIXADDR_START) >> 10, - -#ifdef CONFIG_HIGHMEM - PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, - (LAST_PKMAP*PAGE_SIZE) >> 10, -#endif - - VMALLOC_START, VMALLOC_END, - (VMALLOC_END - VMALLOC_START) >> 20, - - (unsigned long)__va(0), (unsigned long)high_memory, - ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20, - - (unsigned long)&__init_begin, (unsigned long)&__init_end, - ((unsigned long)&__init_end - - (unsigned long)&__init_begin) >> 10, - - (unsigned long)&_etext, (unsigned long)&_edata, - ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, - - (unsigned long)&_text, (unsigned long)&_etext, - ((unsigned long)&_etext - (unsigned long)&_text) >> 10); + x86_init.hyper.init_after_bootmem(); /* * Check boundaries twice: Some fundamental inconsistencies can @@ -822,58 +720,8 @@ void __init mem_init(void) test_wp_bit(); } -#ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) -{ - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; - - return __add_pages(nid, start_pfn, nr_pages, want_memblock); -} - -#ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) -{ - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; - - zone = page_zone(pfn_to_page(start_pfn)); - return __remove_pages(zone, start_pfn, nr_pages); -} -#endif -#endif - int kernel_set_to_readonly __read_mostly; -void set_kernel_text_rw(void) -{ - unsigned long start = PFN_ALIGN(_text); - unsigned long size = PFN_ALIGN(_etext) - start; - - if (!kernel_set_to_readonly) - return; - - pr_debug("Set kernel text: %lx - %lx for read write\n", - start, start+size); - - set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT); -} - -void set_kernel_text_ro(void) -{ - unsigned long start = PFN_ALIGN(_text); - unsigned long size = PFN_ALIGN(_etext) - start; - - if (!kernel_set_to_readonly) - return; - - pr_debug("Set kernel text: %lx - %lx for read only\n", - start, start+size); - - set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); -} - static void mark_nxdata_nx(void) { /* @@ -882,49 +730,32 @@ static void mark_nxdata_nx(void) */ unsigned long start = PFN_ALIGN(_etext); /* - * This comes from is_kernel_text upper limit. Also HPAGE where used: + * This comes from is_x86_32_kernel_text upper limit. Also HPAGE where used: */ unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start; if (__supported_pte_mask & _PAGE_NX) printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10); - set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT); + set_memory_nx(start, size >> PAGE_SHIFT); } void mark_rodata_ro(void) { unsigned long start = PFN_ALIGN(_text); - unsigned long size = PFN_ALIGN(_etext) - start; + unsigned long size = (unsigned long)__end_rodata - start; set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); - printk(KERN_INFO "Write protecting the kernel text: %luk\n", + pr_info("Write protecting kernel text and read-only data: %luk\n", size >> 10); kernel_set_to_readonly = 1; #ifdef CONFIG_CPA_DEBUG - printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n", - start, start+size); - set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT); - - printk(KERN_INFO "Testing CPA: write protecting again\n"); - set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); -#endif - - start += size; - size = (unsigned long)__end_rodata - start; - set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); - printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", - size >> 10); - -#ifdef CONFIG_CPA_DEBUG - printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, start + size); + pr_info("Testing CPA: Reverting %lx-%lx\n", start, start + size); set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT); - printk(KERN_INFO "Testing CPA: write protecting again\n"); + pr_info("Testing CPA: write protecting again\n"); set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); #endif mark_nxdata_nx(); - if (__supported_pte_mask & _PAGE_NX) - debug_checkwx(); } |
