diff options
Diffstat (limited to 'arch/arm64/include/asm/memory.h')
| -rw-r--r-- | arch/arm64/include/asm/memory.h | 370 |
1 files changed, 240 insertions, 130 deletions
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index e1ec947e7c0c..9d54b2ea49d6 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -1,32 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Based on arch/arm/include/asm/memory.h * * Copyright (C) 2000-2002 Russell King * Copyright (C) 2012 ARM Ltd. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * * Note: this file should not be included by non-asm/.h files */ #ifndef __ASM_MEMORY_H #define __ASM_MEMORY_H -#include <linux/compiler.h> #include <linux/const.h> -#include <linux/types.h> -#include <asm/bug.h> +#include <linux/sizes.h> #include <asm/page-def.h> -#include <asm/sizes.h> /* * Size of the PCI I/O space. This must remain a power of two so that @@ -37,58 +23,94 @@ /* * VMEMMAP_SIZE - allows the whole linear region to be covered by * a struct page array + * + * If we are configured with a 52-bit kernel VA then our VMEMMAP_SIZE + * needs to cover the memory region from the beginning of the 52-bit + * PAGE_OFFSET all the way to PAGE_END for 48-bit. This allows us to + * keep a constant PAGE_OFFSET and "fallback" to using the higher end + * of the VMEMMAP where 52-bit support is not available in hardware. */ -#define VMEMMAP_SIZE (UL(1) << (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)) +#define VMEMMAP_RANGE (_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) +#define VMEMMAP_SIZE ((VMEMMAP_RANGE >> PAGE_SHIFT) * sizeof(struct page)) /* - * PAGE_OFFSET - the virtual address of the start of the linear map (top - * (VA_BITS - 1)) - * KIMAGE_VADDR - the virtual address of the start of the kernel image + * PAGE_OFFSET - the virtual address of the start of the linear map, at the + * start of the TTBR1 address space. + * PAGE_END - the end of the linear map, where all other kernel mappings begin. + * KIMAGE_VADDR - the virtual address of the start of the kernel image. * VA_BITS - the maximum number of bits for virtual addresses. - * VA_START - the first kernel virtual address. */ #define VA_BITS (CONFIG_ARM64_VA_BITS) -#define VA_START (UL(0xffffffffffffffff) - \ - (UL(1) << VA_BITS) + 1) -#define PAGE_OFFSET (UL(0xffffffffffffffff) - \ - (UL(1) << (VA_BITS - 1)) + 1) +#define _PAGE_OFFSET(va) (-(UL(1) << (va))) +#define PAGE_OFFSET (_PAGE_OFFSET(VA_BITS)) #define KIMAGE_VADDR (MODULES_END) -#define BPF_JIT_REGION_START (VA_START + KASAN_SHADOW_SIZE) -#define BPF_JIT_REGION_SIZE (SZ_128M) -#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE) #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) -#define MODULES_VADDR (BPF_JIT_REGION_END) -#define MODULES_VSIZE (SZ_128M) -#define VMEMMAP_START (PAGE_OFFSET - VMEMMAP_SIZE) -#define PCI_IO_END (VMEMMAP_START - SZ_2M) -#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) -#define FIXADDR_TOP (PCI_IO_START - SZ_2M) - -#define KERNEL_START _text -#define KERNEL_END _end - -#ifdef CONFIG_ARM64_USER_VA_BITS_52 -#define MAX_USER_VA_BITS 52 +#define MODULES_VADDR (_PAGE_END(VA_BITS_MIN)) +#define MODULES_VSIZE (SZ_2G) +#define VMEMMAP_START (VMEMMAP_END - VMEMMAP_SIZE) +#define VMEMMAP_END (-UL(SZ_1G)) +#define PCI_IO_START (VMEMMAP_END + SZ_8M) +#define PCI_IO_END (PCI_IO_START + PCI_IO_SIZE) +#define FIXADDR_TOP (-UL(SZ_8M)) + +#if VA_BITS > 48 +#ifdef CONFIG_ARM64_16K_PAGES +#define VA_BITS_MIN (47) +#else +#define VA_BITS_MIN (48) +#endif #else -#define MAX_USER_VA_BITS VA_BITS +#define VA_BITS_MIN (VA_BITS) #endif +#define _PAGE_END(va) (-(UL(1) << ((va) - 1))) + +#define KERNEL_START _text +#define KERNEL_END _end + /* - * Generic and tag-based KASAN require 1/8th and 1/16th of the kernel virtual - * address space for the shadow region respectively. They can bloat the stack - * significantly, so double the (minimum) stack size when they are in use. + * Generic and Software Tag-Based KASAN modes require 1/8th and 1/16th of the + * kernel virtual address space for storing the shadow memory respectively. + * + * The mapping between a virtual memory address and its corresponding shadow + * memory address is defined based on the formula: + * + * shadow_addr = (addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET + * + * where KASAN_SHADOW_SCALE_SHIFT is the order of the number of bits that map + * to a single shadow byte and KASAN_SHADOW_OFFSET is a constant that offsets + * the mapping. Note that KASAN_SHADOW_OFFSET does not point to the start of + * the shadow memory region. + * + * Based on this mapping, we define two constants: + * + * KASAN_SHADOW_START: the start of the shadow memory region; + * KASAN_SHADOW_END: the end of the shadow memory region. + * + * KASAN_SHADOW_END is defined first as the shadow address that corresponds to + * the upper bound of possible virtual kernel memory addresses UL(1) << 64 + * according to the mapping formula. + * + * KASAN_SHADOW_START is defined second based on KASAN_SHADOW_END. The shadow + * memory start must map to the lowest possible kernel virtual memory address + * and thus it depends on the actual bitness of the address space. + * + * As KASAN inserts redzones between stack variables, this increases the stack + * memory usage significantly. Thus, we double the (minimum) stack size. */ -#ifdef CONFIG_KASAN -#define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - KASAN_SHADOW_SCALE_SHIFT)) -#ifdef CONFIG_KASAN_EXTRA -#define KASAN_THREAD_SHIFT 2 -#else +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) +#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) +#define KASAN_SHADOW_END ((UL(1) << (64 - KASAN_SHADOW_SCALE_SHIFT)) + KASAN_SHADOW_OFFSET) +#define _KASAN_SHADOW_START(va) (KASAN_SHADOW_END - (UL(1) << ((va) - KASAN_SHADOW_SCALE_SHIFT))) +#define KASAN_SHADOW_START _KASAN_SHADOW_START(vabits_actual) +#define PAGE_END KASAN_SHADOW_START #define KASAN_THREAD_SHIFT 1 -#endif /* CONFIG_KASAN_EXTRA */ #else -#define KASAN_SHADOW_SIZE (0) #define KASAN_THREAD_SHIFT 0 -#endif +#define PAGE_END (_PAGE_END(VA_BITS_MIN)) +#endif /* CONFIG_KASAN */ + +#define DIRECT_MAP_PHYSMEM_END __pa(PAGE_END - 1) #define MIN_THREAD_SHIFT (14 + KASAN_THREAD_SHIFT) @@ -96,7 +118,7 @@ * VMAP'd stacks are allocated at page granularity, so we must ensure that such * stacks are a multiple of page size. */ -#if defined(CONFIG_VMAP_STACK) && (MIN_THREAD_SHIFT < PAGE_SHIFT) +#if (MIN_THREAD_SHIFT < PAGE_SHIFT) #define THREAD_SHIFT PAGE_SHIFT #else #define THREAD_SHIFT MIN_THREAD_SHIFT @@ -113,49 +135,50 @@ * checking sp & (1 << THREAD_SHIFT), which we can do cheaply in the entry * assembly. */ -#ifdef CONFIG_VMAP_STACK #define THREAD_ALIGN (2 * THREAD_SIZE) -#else -#define THREAD_ALIGN THREAD_SIZE -#endif #define IRQ_STACK_SIZE THREAD_SIZE #define OVERFLOW_STACK_SIZE SZ_4K +#define NVHE_STACK_SHIFT PAGE_SHIFT +#define NVHE_STACK_SIZE (UL(1) << NVHE_STACK_SHIFT) + /* - * Alignment of kernel segments (e.g. .text, .data). - */ -#if defined(CONFIG_DEBUG_ALIGN_RODATA) -/* - * 4 KB granule: 1 level 2 entry - * 16 KB granule: 128 level 3 entries, with contiguous bit - * 64 KB granule: 32 level 3 entries, with contiguous bit + * With the minimum frame size of [x29, x30], exactly half the combined + * sizes of the hyp and overflow stacks is the maximum size needed to + * save the unwinded stacktrace; plus an additional entry to delimit the + * end. */ -#define SEGMENT_ALIGN SZ_2M -#else +#define NVHE_STACKTRACE_SIZE ((OVERFLOW_STACK_SIZE + NVHE_STACK_SIZE) / 2 + sizeof(long)) + /* + * Alignment of kernel segments (e.g. .text, .data). + * * 4 KB granule: 16 level 3 entries, with contiguous bit * 16 KB granule: 4 level 3 entries, without contiguous bit * 64 KB granule: 1 level 3 entry */ -#define SEGMENT_ALIGN SZ_64K -#endif +#define SEGMENT_ALIGN SZ_64K /* * Memory types available. + * + * IMPORTANT: MT_NORMAL must be index 0 since vm_get_page_prot() may 'or' in + * the MT_NORMAL_TAGGED memory type for PROT_MTE mappings. Note + * that protection_map[] only contains MT_NORMAL attributes. */ -#define MT_DEVICE_nGnRnE 0 -#define MT_DEVICE_nGnRE 1 -#define MT_DEVICE_GRE 2 -#define MT_NORMAL_NC 3 -#define MT_NORMAL 4 -#define MT_NORMAL_WT 5 +#define MT_NORMAL 0 +#define MT_NORMAL_TAGGED 1 +#define MT_NORMAL_NC 2 +#define MT_DEVICE_nGnRnE 3 +#define MT_DEVICE_nGnRE 4 /* * Memory types for Stage-2 translation */ #define MT_S2_NORMAL 0xf +#define MT_S2_NORMAL_NC 0x5 #define MT_S2_DEVICE_nGnRE 0x1 /* @@ -163,6 +186,7 @@ * Stage-2 enforces Normal-WB and Device-nGnRE */ #define MT_S2_FWB_NORMAL 6 +#define MT_S2_FWB_NORMAL_NC 5 #define MT_S2_FWB_DEVICE_nGnRE 1 #ifdef CONFIG_ARM64_4K_PAGES @@ -171,28 +195,68 @@ #define IOREMAP_MAX_ORDER (PMD_SHIFT) #endif -#ifndef __ASSEMBLY__ +/* + * Open-coded (swapper_pg_dir - reserved_pg_dir) as this cannot be calculated + * until link time. + */ +#define RESERVED_SWAPPER_OFFSET (PAGE_SIZE) + +/* + * Open-coded (swapper_pg_dir - tramp_pg_dir) as this cannot be calculated + * until link time. + */ +#define TRAMP_SWAPPER_OFFSET (2 * PAGE_SIZE) + +#ifndef __ASSEMBLER__ #include <linux/bitops.h> +#include <linux/compiler.h> #include <linux/mmdebug.h> +#include <linux/types.h> +#include <asm/boot.h> +#include <asm/bug.h> +#include <asm/sections.h> +#include <asm/sysreg.h> + +static inline u64 __pure read_tcr(void) +{ + u64 tcr; + + // read_sysreg() uses asm volatile, so avoid it here + asm("mrs %0, tcr_el1" : "=r"(tcr)); + return tcr; +} + +#if VA_BITS > 48 +// For reasons of #include hell, we can't use TCR_T1SZ_OFFSET/TCR_T1SZ_MASK here +#define vabits_actual (64 - ((read_tcr() >> 16) & 63)) +#else +#define vabits_actual ((u64)VA_BITS) +#endif extern s64 memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ #define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; }) -/* the virtual base of the kernel image (minus TEXT_OFFSET) */ -extern u64 kimage_vaddr; - /* the offset between the kernel virtual and physical mappings */ extern u64 kimage_voffset; static inline unsigned long kaslr_offset(void) { - return kimage_vaddr - KIMAGE_VADDR; + return (u64)&_text - KIMAGE_VADDR; } -/* the actual size of a user virtual address */ -extern u64 vabits_user; +#ifdef CONFIG_RANDOMIZE_BASE +void kaslr_init(void); +static inline bool kaslr_enabled(void) +{ + extern bool __kaslr_is_enabled; + return __kaslr_is_enabled; +} +#else +static inline void kaslr_init(void) { } +static inline bool kaslr_enabled(void) { return false; } +#endif /* * Allow all memory at the discovery stage. We will clip it later. @@ -215,20 +279,44 @@ extern u64 vabits_user; * up with a tagged userland pointer. Clear the tag to get a sane pointer to * pass on to access_ok(), for instance. */ -#define untagged_addr(addr) \ - ((__typeof__(addr))sign_extend64((u64)(addr), 55)) +#define __untagged_addr(addr) \ + ((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55)) -#ifdef CONFIG_KASAN_SW_TAGS +#define untagged_addr(addr) ({ \ + u64 __addr = (__force u64)(addr); \ + __addr &= __untagged_addr(__addr); \ + (__force __typeof__(addr))__addr; \ +}) + +#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) #define __tag_shifted(tag) ((u64)(tag) << 56) -#define __tag_set(addr, tag) (__typeof__(addr))( \ - ((u64)(addr) & ~__tag_shifted(0xff)) | __tag_shifted(tag)) -#define __tag_reset(addr) untagged_addr(addr) +#define __tag_reset(addr) __untagged_addr(addr) #define __tag_get(addr) (__u8)((u64)(addr) >> 56) #else -#define __tag_set(addr, tag) (addr) +#define __tag_shifted(tag) 0UL #define __tag_reset(addr) (addr) #define __tag_get(addr) 0 -#endif +#endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */ + +static inline const void *__tag_set(const void *addr, u8 tag) +{ + u64 __addr = (u64)addr & ~__tag_shifted(0xff); + return (const void *)(__addr | __tag_shifted(tag)); +} + +#ifdef CONFIG_KASAN_HW_TAGS +#define arch_enable_tag_checks_sync() mte_enable_kernel_sync() +#define arch_enable_tag_checks_async() mte_enable_kernel_async() +#define arch_enable_tag_checks_asymm() mte_enable_kernel_asymm() +#define arch_enable_tag_checks_write_only() mte_enable_kernel_store_only() +#define arch_suppress_tag_checks_start() mte_enable_tco() +#define arch_suppress_tag_checks_stop() mte_disable_tco() +#define arch_force_async_tag_fault() mte_check_tfsr_exit() +#define arch_get_random_tag() mte_get_random_tag() +#define arch_get_mem_tag(addr) mte_get_mem_tag(addr) +#define arch_set_mem_tag_range(addr, size, tag, init) \ + mte_set_mem_tag_range((addr), (size), (tag), (init)) +#endif /* CONFIG_KASAN_HW_TAGS */ /* * Physical vs virtual RAM address space conversion. These are @@ -238,19 +326,18 @@ extern u64 vabits_user; /* - * The linear kernel range starts in the middle of the virtual adddress - * space. Testing the top bit for the start of the region is a - * sufficient check. + * Check whether an arbitrary address is within the linear map, which + * lives in the [PAGE_OFFSET, PAGE_END) interval at the bottom of the + * kernel's TTBR1 address range. */ -#define __is_lm_address(addr) (!!((addr) & BIT(VA_BITS - 1))) +#define __is_lm_address(addr) (((u64)(addr) - PAGE_OFFSET) < (PAGE_END - PAGE_OFFSET)) -#define __lm_to_phys(addr) (((addr) & ~PAGE_OFFSET) + PHYS_OFFSET) +#define __lm_to_phys(addr) (((addr) - PAGE_OFFSET) + PHYS_OFFSET) #define __kimg_to_phys(addr) ((addr) - kimage_voffset) #define __virt_to_phys_nodebug(x) ({ \ - phys_addr_t __x = (phys_addr_t)(x); \ - __is_lm_address(__x) ? __lm_to_phys(__x) : \ - __kimg_to_phys(__x); \ + phys_addr_t __x = (phys_addr_t)(__tag_reset(x)); \ + __is_lm_address(__x) ? __lm_to_phys(__x) : __kimg_to_phys(__x); \ }) #define __pa_symbol_nodebug(x) __kimg_to_phys((phys_addr_t)(x)) @@ -261,18 +348,12 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x); #else #define __virt_to_phys(x) __virt_to_phys_nodebug(x) #define __phys_addr_symbol(x) __pa_symbol_nodebug(x) -#endif +#endif /* CONFIG_DEBUG_VIRTUAL */ #define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET) #define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset)) /* - * Convert a page to/from a physical address - */ -#define page_to_phys(page) (__pfn_to_phys(page_to_pfn(page))) -#define phys_to_page(phys) (pfn_to_page(__phys_to_pfn(phys))) - -/* * Note: Drivers should NOT use these. They are the wrong * translation for translating DMA addresses. Use the driver * DMA support - see dma-mapping.h. @@ -289,6 +370,14 @@ static inline void *phys_to_virt(phys_addr_t x) return (void *)(__phys_to_virt(x)); } +/* Needed already here for resolving __phys_to_pfn() in virt_to_pfn() */ +#include <asm-generic/memory_model.h> + +static inline unsigned long virt_to_pfn(const void *kaddr) +{ + return __phys_to_pfn(virt_to_phys(kaddr)); +} + /* * Drivers should NOT use these either. */ @@ -297,41 +386,62 @@ static inline void *phys_to_virt(phys_addr_t x) #define __pa_nodebug(x) __virt_to_phys_nodebug((unsigned long)(x)) #define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x))) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) -#define virt_to_pfn(x) __phys_to_pfn(__virt_to_phys((unsigned long)(x))) -#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x)) +#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x)) /* - * virt_to_page(k) convert a _valid_ virtual address to struct page * - * virt_addr_valid(k) indicates whether a virtual address is valid + * virt_to_page(x) convert a _valid_ virtual address to struct page * + * virt_addr_valid(x) indicates whether a virtual address is valid */ -#define ARCH_PFN_OFFSET ((unsigned long)PHYS_PFN_OFFSET) -#ifndef CONFIG_SPARSEMEM_VMEMMAP -#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define _virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) +#if defined(CONFIG_DEBUG_VIRTUAL) +#define page_to_virt(x) ({ \ + __typeof__(x) __page = x; \ + void *__addr = __va(page_to_phys(__page)); \ + (void *)__tag_set((const void *)__addr, page_kasan_tag(__page));\ +}) +#define virt_to_page(x) pfn_to_page(virt_to_pfn(x)) #else -#define __virt_to_pgoff(kaddr) (((u64)(kaddr) & ~PAGE_OFFSET) / PAGE_SIZE * sizeof(struct page)) -#define __page_to_voff(kaddr) (((u64)(kaddr) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page)) - -#define page_to_virt(page) ({ \ - unsigned long __addr = \ - ((__page_to_voff(page)) | PAGE_OFFSET); \ - __addr = __tag_set(__addr, page_kasan_tag(page)); \ - ((void *)__addr); \ +#define page_to_virt(x) ({ \ + __typeof__(x) __page = x; \ + u64 __idx = ((u64)__page - VMEMMAP_START) / sizeof(struct page);\ + u64 __addr = PAGE_OFFSET + (__idx * PAGE_SIZE); \ + (void *)__tag_set((const void *)__addr, page_kasan_tag(__page));\ }) -#define virt_to_page(vaddr) ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START)) +#define virt_to_page(x) ({ \ + u64 __idx = (__tag_reset((u64)x) - PAGE_OFFSET) / PAGE_SIZE; \ + u64 __addr = VMEMMAP_START + (__idx * sizeof(struct page)); \ + (struct page *)__addr; \ +}) +#endif /* CONFIG_DEBUG_VIRTUAL */ -#define _virt_addr_valid(kaddr) pfn_valid((((u64)(kaddr) & ~PAGE_OFFSET) \ - + PHYS_OFFSET) >> PAGE_SHIFT) -#endif -#endif +#define virt_addr_valid(addr) ({ \ + __typeof__(addr) __addr = __tag_reset(addr); \ + __is_lm_address(__addr) && pfn_is_map_memory(virt_to_pfn(__addr)); \ +}) -#define _virt_addr_is_linear(kaddr) \ - (__tag_reset((u64)(kaddr)) >= PAGE_OFFSET) -#define virt_addr_valid(kaddr) \ - (_virt_addr_is_linear(kaddr) && _virt_addr_valid(kaddr)) +void dump_mem_limit(void); +#endif /* !__ASSEMBLER__ */ -#include <asm-generic/memory_model.h> +/* + * Given that the GIC architecture permits ITS implementations that can only be + * configured with a LPI table address once, GICv3 systems with many CPUs may + * end up reserving a lot of different regions after a kexec for their LPI + * tables (one per CPU), as we are forced to reuse the same memory after kexec + * (and thus reserve it persistently with EFI beforehand) + */ +#if defined(CONFIG_EFI) && defined(CONFIG_ARM_GIC_V3_ITS) +# define INIT_MEMBLOCK_RESERVED_REGIONS (INIT_MEMBLOCK_REGIONS + NR_CPUS + 1) +#endif +/* + * memory regions which marked with flag MEMBLOCK_NOMAP(for example, the memory + * of the EFI_UNUSABLE_MEMORY type) may divide a continuous memory block into + * multiple parts. As a result, the number of memory regions is large. + */ +#ifdef CONFIG_EFI +#define INIT_MEMBLOCK_MEMORY_REGIONS (INIT_MEMBLOCK_REGIONS * 8) #endif + + +#endif /* __ASM_MEMORY_H */ |
