diff options
Diffstat (limited to 'arch/x86/mm/init.c')
-rw-r--r-- | arch/x86/mm/init.c | 171 |
1 files changed, 97 insertions, 74 deletions
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 679893ea5e68..7456df985d96 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -7,6 +7,7 @@ #include <linux/swapops.h> #include <linux/kmemleak.h> #include <linux/sched/task.h> +#include <linux/execmem.h> #include <asm/set_memory.h> #include <asm/cpu_device_id.h> @@ -27,6 +28,7 @@ #include <asm/text-patching.h> #include <asm/memtype.h> #include <asm/paravirt.h> +#include <asm/mmu_context.h> /* * We need to define the tracepoints somewhere, and tlb.c @@ -172,11 +174,7 @@ __ref void *alloc_low_pages(unsigned int num) * randomization is enabled. */ -#ifndef CONFIG_X86_5LEVEL -#define INIT_PGD_PAGE_TABLES 3 -#else #define INIT_PGD_PAGE_TABLES 4 -#endif #ifndef CONFIG_RANDOMIZE_MEMORY #define INIT_PGD_PAGE_COUNT (2 * INIT_PGD_PAGE_TABLES) @@ -261,33 +259,34 @@ static void __init probe_page_size_mask(void) } } -#define INTEL_MATCH(_model) { .vendor = X86_VENDOR_INTEL, \ - .family = 6, \ - .model = _model, \ - } /* - * INVLPG may not properly flush Global entries - * on these CPUs when PCIDs are enabled. + * INVLPG may not properly flush Global entries on + * these CPUs. New microcode fixes the issue. */ static const struct x86_cpu_id invlpg_miss_ids[] = { - INTEL_MATCH(INTEL_FAM6_ALDERLAKE ), - INTEL_MATCH(INTEL_FAM6_ALDERLAKE_L ), - INTEL_MATCH(INTEL_FAM6_ATOM_GRACEMONT ), - INTEL_MATCH(INTEL_FAM6_RAPTORLAKE ), - INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_P), - INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_S), + X86_MATCH_VFM(INTEL_ALDERLAKE, 0x2e), + X86_MATCH_VFM(INTEL_ALDERLAKE_L, 0x42c), + X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, 0x11), + X86_MATCH_VFM(INTEL_RAPTORLAKE, 0x118), + X86_MATCH_VFM(INTEL_RAPTORLAKE_P, 0x4117), + X86_MATCH_VFM(INTEL_RAPTORLAKE_S, 0x2e), {} }; static void setup_pcid(void) { + const struct x86_cpu_id *invlpg_miss_match; + if (!IS_ENABLED(CONFIG_X86_64)) return; if (!boot_cpu_has(X86_FEATURE_PCID)) return; - if (x86_match_cpu(invlpg_miss_ids)) { + invlpg_miss_match = x86_match_cpu(invlpg_miss_ids); + + if (invlpg_miss_match && + boot_cpu_data.microcode < invlpg_miss_match->driver_data) { pr_info("Incomplete global flushes, disabling PCID"); setup_clear_cpu_cap(X86_FEATURE_PCID); return; @@ -643,8 +642,13 @@ static void __init memory_map_top_down(unsigned long map_start, */ addr = memblock_phys_alloc_range(PMD_SIZE, PMD_SIZE, map_start, map_end); - memblock_phys_free(addr, PMD_SIZE); - real_end = addr + PMD_SIZE; + if (!addr) { + pr_warn("Failed to release memory for alloc_low_pages()"); + real_end = max(map_start, ALIGN_DOWN(map_end, PMD_SIZE)); + } else { + memblock_phys_free(addr, PMD_SIZE); + real_end = addr + PMD_SIZE; + } /* step_size need to be small so pgt_buf from BRK could cover it */ step_size = PMD_SIZE; @@ -817,31 +821,33 @@ void __init poking_init(void) spinlock_t *ptl; pte_t *ptep; - poking_mm = mm_alloc(); - BUG_ON(!poking_mm); + text_poke_mm = mm_alloc(); + BUG_ON(!text_poke_mm); /* Xen PV guests need the PGD to be pinned. */ - paravirt_enter_mmap(poking_mm); + paravirt_enter_mmap(text_poke_mm); + + set_notrack_mm(text_poke_mm); /* * Randomize the poking address, but make sure that the following page * will be mapped at the same PMD. We need 2 pages, so find space for 3, * and adjust the address if the PMD ends after the first one. */ - poking_addr = TASK_UNMAPPED_BASE; + text_poke_mm_addr = TASK_UNMAPPED_BASE; if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) - poking_addr += (kaslr_get_random_long("Poking") & PAGE_MASK) % + text_poke_mm_addr += (kaslr_get_random_long("Poking") & PAGE_MASK) % (TASK_SIZE - TASK_UNMAPPED_BASE - 3 * PAGE_SIZE); - if (((poking_addr + PAGE_SIZE) & ~PMD_MASK) == 0) - poking_addr += PAGE_SIZE; + if (((text_poke_mm_addr + PAGE_SIZE) & ~PMD_MASK) == 0) + text_poke_mm_addr += PAGE_SIZE; /* * We need to trigger the allocation of the page-tables that will be * needed for poking now. Later, poking may be performed in an atomic * section, which might cause allocation to fail. */ - ptep = get_locked_pte(poking_mm, poking_addr, &ptl); + ptep = get_locked_pte(text_poke_mm, text_poke_mm_addr, &ptl); BUG_ON(!ptep); pte_unmap_unlock(ptep, ptl); } @@ -990,53 +996,6 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) } #endif -/* - * Calculate the precise size of the DMA zone (first 16 MB of RAM), - * and pass it to the MM layer - to help it set zone watermarks more - * accurately. - * - * Done on 64-bit systems only for the time being, although 32-bit systems - * might benefit from this as well. - */ -void __init memblock_find_dma_reserve(void) -{ -#ifdef CONFIG_X86_64 - u64 nr_pages = 0, nr_free_pages = 0; - unsigned long start_pfn, end_pfn; - phys_addr_t start_addr, end_addr; - int i; - u64 u; - - /* - * Iterate over all memory ranges (free and reserved ones alike), - * to calculate the total number of pages in the first 16 MB of RAM: - */ - nr_pages = 0; - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { - start_pfn = min(start_pfn, MAX_DMA_PFN); - end_pfn = min(end_pfn, MAX_DMA_PFN); - - nr_pages += end_pfn - start_pfn; - } - - /* - * Iterate over free memory ranges to calculate the number of free - * pages in the DMA zone, while not counting potential partial - * pages at the beginning or the end of the range: - */ - nr_free_pages = 0; - for_each_free_mem_range(u, NUMA_NO_NODE, MEMBLOCK_NONE, &start_addr, &end_addr, NULL) { - start_pfn = min_t(unsigned long, PFN_UP(start_addr), MAX_DMA_PFN); - end_pfn = min_t(unsigned long, PFN_DOWN(end_addr), MAX_DMA_PFN); - - if (start_pfn < end_pfn) - nr_free_pages += end_pfn - start_pfn; - } - - set_dma_reserve(nr_pages - nr_free_pages); -#endif -} - void __init zone_sizes_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; @@ -1099,3 +1058,67 @@ unsigned long arch_max_swapfile_size(void) return pages; } #endif + +#ifdef CONFIG_EXECMEM +static struct execmem_info execmem_info __ro_after_init; + +#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX +void execmem_fill_trapping_insns(void *ptr, size_t size, bool writeable) +{ + /* fill memory with INT3 instructions */ + if (writeable) + memset(ptr, INT3_INSN_OPCODE, size); + else + text_poke_set(ptr, INT3_INSN_OPCODE, size); +} +#endif + +struct execmem_info __init *execmem_arch_setup(void) +{ + unsigned long start, offset = 0; + enum execmem_range_flags flags; + pgprot_t pgprot; + + if (kaslr_enabled()) + offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE; + + start = MODULES_VADDR + offset; + + if (IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) && + cpu_feature_enabled(X86_FEATURE_PSE)) { + pgprot = PAGE_KERNEL_ROX; + flags = EXECMEM_KASAN_SHADOW | EXECMEM_ROX_CACHE; + } else { + pgprot = PAGE_KERNEL; + flags = EXECMEM_KASAN_SHADOW; + } + + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_MODULE_TEXT] = { + .flags = flags, + .start = start, + .end = MODULES_END, + .pgprot = pgprot, + .alignment = MODULE_ALIGN, + }, + [EXECMEM_KPROBES ... EXECMEM_BPF] = { + .flags = EXECMEM_KASAN_SHADOW, + .start = start, + .end = MODULES_END, + .pgprot = PAGE_KERNEL, + .alignment = MODULE_ALIGN, + }, + [EXECMEM_MODULE_DATA] = { + .flags = EXECMEM_KASAN_SHADOW, + .start = start, + .end = MODULES_END, + .pgprot = PAGE_KERNEL, + .alignment = MODULE_ALIGN, + }, + }, + }; + + return &execmem_info; +} +#endif /* CONFIG_EXECMEM */ |