summaryrefslogtreecommitdiff
path: root/arch/arm64/include
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/include')
-rw-r--r--arch/arm64/include/asm/archrandom.h2
-rw-r--r--arch/arm64/include/asm/assembler.h55
-rw-r--r--arch/arm64/include/asm/cpufeature.h116
-rw-r--r--arch/arm64/include/asm/esr.h13
-rw-r--r--arch/arm64/include/asm/fixmap.h2
-rw-r--r--arch/arm64/include/asm/kasan.h2
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h103
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h10
-rw-r--r--arch/arm64/include/asm/memory.h17
-rw-r--r--arch/arm64/include/asm/mman.h36
-rw-r--r--arch/arm64/include/asm/mmu.h40
-rw-r--r--arch/arm64/include/asm/mmu_context.h83
-rw-r--r--arch/arm64/include/asm/pgalloc.h52
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h33
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h20
-rw-r--r--arch/arm64/include/asm/pgtable-types.h6
-rw-r--r--arch/arm64/include/asm/pgtable.h225
-rw-r--r--arch/arm64/include/asm/scs.h36
-rw-r--r--arch/arm64/include/asm/setup.h3
-rw-r--r--arch/arm64/include/asm/tlb.h3
20 files changed, 621 insertions, 236 deletions
diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h
index ecdb3cfcd0f8..8babfbe31f95 100644
--- a/arch/arm64/include/asm/archrandom.h
+++ b/arch/arm64/include/asm/archrandom.h
@@ -129,6 +129,4 @@ static inline bool __init __early_cpu_has_rndr(void)
return (ftr >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf;
}
-u64 kaslr_early_init(void *fdt);
-
#endif /* _ASM_ARCHRANDOM_H */
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 96b18a707507..ab8b396428da 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -342,20 +342,6 @@ alternative_cb_end
.endm
/*
- * idmap_get_t0sz - get the T0SZ value needed to cover the ID map
- *
- * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
- * entire ID map region can be mapped. As T0SZ == (64 - #bits used),
- * this number conveniently equals the number of leading zeroes in
- * the physical address of _end.
- */
- .macro idmap_get_t0sz, reg
- adrp \reg, _end
- orr \reg, \reg, #(1 << VA_BITS_MIN) - 1
- clz \reg, \reg
- .endm
-
-/*
* tcr_compute_pa_size - set TCR.(I)PS to the highest supported
* ID_AA64MMFR0_EL1.PARange value
*
@@ -586,18 +572,27 @@ alternative_endif
.endm
/*
- * Offset ttbr1 to allow for 48-bit kernel VAs set with 52-bit PTRS_PER_PGD.
+ * If the kernel is built for 52-bit virtual addressing but the hardware only
+ * supports 48 bits, we cannot program the pgdir address into TTBR1 directly,
+ * but we have to add an offset so that the TTBR1 address corresponds with the
+ * pgdir entry that covers the lowest 48-bit addressable VA.
+ *
+ * Note that this trick is only used for LVA/64k pages - LPA2/4k pages uses an
+ * additional paging level, and on LPA2/16k pages, we would end up with a root
+ * level table with only 2 entries, which is suboptimal in terms of TLB
+ * utilization, so there we fall back to 47 bits of translation if LPA2 is not
+ * supported.
+ *
* orr is used as it can cover the immediate value (and is idempotent).
- * In future this may be nop'ed out when dealing with 52-bit kernel VAs.
* ttbr: Value of ttbr to set, modified.
*/
.macro offset_ttbr1, ttbr, tmp
-#ifdef CONFIG_ARM64_VA_BITS_52
- mrs_s \tmp, SYS_ID_AA64MMFR2_EL1
- and \tmp, \tmp, #(0xf << ID_AA64MMFR2_EL1_VARange_SHIFT)
- cbnz \tmp, .Lskipoffs_\@
- orr \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
-.Lskipoffs_\@ :
+#if defined(CONFIG_ARM64_VA_BITS_52) && !defined(CONFIG_ARM64_LPA2)
+ mrs \tmp, tcr_el1
+ and \tmp, \tmp, #TCR_T1SZ_MASK
+ cmp \tmp, #TCR_T1SZ(VA_BITS_MIN)
+ orr \tmp, \ttbr, #TTBR1_BADDR_4852_OFFSET
+ csel \ttbr, \tmp, \ttbr, eq
#endif
.endm
@@ -619,25 +614,13 @@ alternative_endif
.macro phys_to_pte, pte, phys
#ifdef CONFIG_ARM64_PA_BITS_52
- /*
- * We assume \phys is 64K aligned and this is guaranteed by only
- * supporting this configuration with 64K pages.
- */
- orr \pte, \phys, \phys, lsr #36
- and \pte, \pte, #PTE_ADDR_MASK
+ orr \pte, \phys, \phys, lsr #PTE_ADDR_HIGH_SHIFT
+ and \pte, \pte, #PHYS_TO_PTE_ADDR_MASK
#else
mov \pte, \phys
#endif
.endm
- .macro pte_to_phys, phys, pte
- and \phys, \pte, #PTE_ADDR_MASK
-#ifdef CONFIG_ARM64_PA_BITS_52
- orr \phys, \phys, \phys, lsl #PTE_ADDR_HIGH_SHIFT
- and \phys, \phys, GENMASK_ULL(PHYS_MASK_SHIFT - 1, PAGE_SHIFT)
-#endif
- .endm
-
/*
* tcr_clear_errata_bits - Clear TCR bits that trigger an errata on this CPU.
*/
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 34fcdbc65d7d..66ba0801f7b7 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -17,6 +17,8 @@
#define ARM64_SW_FEATURE_OVERRIDE_NOKASLR 0
#define ARM64_SW_FEATURE_OVERRIDE_HVHE 4
+#define ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF 8
+#define ARM64_SW_FEATURE_OVERRIDE_NOWXN 12
#ifndef __ASSEMBLY__
@@ -910,7 +912,9 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur);
struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id);
+extern struct arm64_ftr_override id_aa64mmfr0_override;
extern struct arm64_ftr_override id_aa64mmfr1_override;
+extern struct arm64_ftr_override id_aa64mmfr2_override;
extern struct arm64_ftr_override id_aa64pfr0_override;
extern struct arm64_ftr_override id_aa64pfr1_override;
extern struct arm64_ftr_override id_aa64zfr0_override;
@@ -920,9 +924,121 @@ extern struct arm64_ftr_override id_aa64isar2_override;
extern struct arm64_ftr_override arm64_sw_feature_override;
+static inline
+u64 arm64_apply_feature_override(u64 val, int feat, int width,
+ const struct arm64_ftr_override *override)
+{
+ u64 oval = override->val;
+
+ /*
+ * When it encounters an invalid override (e.g., an override that
+ * cannot be honoured due to a missing CPU feature), the early idreg
+ * override code will set the mask to 0x0 and the value to non-zero for
+ * the field in question. In order to determine whether the override is
+ * valid or not for the field we are interested in, we first need to
+ * disregard bits belonging to other fields.
+ */
+ oval &= GENMASK_ULL(feat + width - 1, feat);
+
+ /*
+ * The override is valid if all value bits are accounted for in the
+ * mask. If so, replace the masked bits with the override value.
+ */
+ if (oval == (oval & override->mask)) {
+ val &= ~override->mask;
+ val |= oval;
+ }
+
+ /* Extract the field from the updated value */
+ return cpuid_feature_extract_unsigned_field(val, feat);
+}
+
+static inline bool arm64_test_sw_feature_override(int feat)
+{
+ /*
+ * Software features are pseudo CPU features that have no underlying
+ * CPUID system register value to apply the override to.
+ */
+ return arm64_apply_feature_override(0, feat, 4,
+ &arm64_sw_feature_override);
+}
+
+static inline bool kaslr_disabled_cmdline(void)
+{
+ return arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_NOKASLR);
+}
+
+static inline bool arm64_wxn_enabled(void)
+{
+ if (!IS_ENABLED(CONFIG_ARM64_WXN))
+ return false;
+ return !arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_NOWXN);
+}
+
u32 get_kvm_ipa_limit(void);
void dump_cpu_features(void);
+static inline bool cpu_has_bti(void)
+{
+ if (!IS_ENABLED(CONFIG_ARM64_BTI))
+ return false;
+
+ return arm64_apply_feature_override(read_cpuid(ID_AA64PFR1_EL1),
+ ID_AA64PFR1_EL1_BT_SHIFT, 4,
+ &id_aa64pfr1_override);
+}
+
+static inline bool cpu_has_pac(void)
+{
+ u64 isar1, isar2;
+
+ if (!IS_ENABLED(CONFIG_ARM64_PTR_AUTH))
+ return false;
+
+ isar1 = read_cpuid(ID_AA64ISAR1_EL1);
+ isar2 = read_cpuid(ID_AA64ISAR2_EL1);
+
+ if (arm64_apply_feature_override(isar1, ID_AA64ISAR1_EL1_APA_SHIFT, 4,
+ &id_aa64isar1_override))
+ return true;
+
+ if (arm64_apply_feature_override(isar1, ID_AA64ISAR1_EL1_API_SHIFT, 4,
+ &id_aa64isar1_override))
+ return true;
+
+ return arm64_apply_feature_override(isar2, ID_AA64ISAR2_EL1_APA3_SHIFT, 4,
+ &id_aa64isar2_override);
+}
+
+static inline bool cpu_has_lva(void)
+{
+ u64 mmfr2;
+
+ mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
+ mmfr2 &= ~id_aa64mmfr2_override.mask;
+ mmfr2 |= id_aa64mmfr2_override.val;
+ return cpuid_feature_extract_unsigned_field(mmfr2,
+ ID_AA64MMFR2_EL1_VARange_SHIFT);
+}
+
+static inline bool cpu_has_lpa2(void)
+{
+#ifdef CONFIG_ARM64_LPA2
+ u64 mmfr0;
+ int feat;
+
+ mmfr0 = read_sysreg(id_aa64mmfr0_el1);
+ mmfr0 &= ~id_aa64mmfr0_override.mask;
+ mmfr0 |= id_aa64mmfr0_override.val;
+ feat = cpuid_feature_extract_signed_field(mmfr0,
+ ID_AA64MMFR0_EL1_TGRAN_SHIFT);
+
+ return feat >= ID_AA64MMFR0_EL1_TGRAN_LPA2;
+#else
+ return false;
+#endif
+}
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 353fe08546cf..81606bf7d5ac 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -117,15 +117,9 @@
#define ESR_ELx_FSC_ACCESS (0x08)
#define ESR_ELx_FSC_FAULT (0x04)
#define ESR_ELx_FSC_PERM (0x0C)
-#define ESR_ELx_FSC_SEA_TTW0 (0x14)
-#define ESR_ELx_FSC_SEA_TTW1 (0x15)
-#define ESR_ELx_FSC_SEA_TTW2 (0x16)
-#define ESR_ELx_FSC_SEA_TTW3 (0x17)
+#define ESR_ELx_FSC_SEA_TTW(n) (0x14 + (n))
#define ESR_ELx_FSC_SECC (0x18)
-#define ESR_ELx_FSC_SECC_TTW0 (0x1c)
-#define ESR_ELx_FSC_SECC_TTW1 (0x1d)
-#define ESR_ELx_FSC_SECC_TTW2 (0x1e)
-#define ESR_ELx_FSC_SECC_TTW3 (0x1f)
+#define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n))
/* ISS field definitions for Data Aborts */
#define ESR_ELx_ISV_SHIFT (24)
@@ -394,6 +388,9 @@ static inline bool esr_is_data_abort(unsigned long esr)
static inline bool esr_fsc_is_translation_fault(unsigned long esr)
{
+ /* Translation fault, level -1 */
+ if ((esr & ESR_ELx_FSC) == 0b101011)
+ return true;
return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT;
}
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index 58c294a96676..87e307804b99 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -87,6 +87,7 @@ enum fixed_addresses {
FIX_PTE,
FIX_PMD,
FIX_PUD,
+ FIX_P4D,
FIX_PGD,
__end_of_fixed_addresses
@@ -100,7 +101,6 @@ enum fixed_addresses {
#define FIXMAP_PAGE_IO __pgprot(PROT_DEVICE_nGnRE)
void __init early_fixmap_init(void);
-void __init fixmap_copy(pgd_t *pgdir);
#define __early_set_fixmap __set_fixmap
diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h
index 7eefc525a9df..e1b57c13f8a4 100644
--- a/arch/arm64/include/asm/kasan.h
+++ b/arch/arm64/include/asm/kasan.h
@@ -17,11 +17,9 @@
asmlinkage void kasan_early_init(void);
void kasan_init(void);
-void kasan_copy_shadow(pgd_t *pgdir);
#else
static inline void kasan_init(void) { }
-static inline void kasan_copy_shadow(pgd_t *pgdir) { }
#endif
#endif
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 83ddb14b95a5..bf05a77873a4 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -13,28 +13,27 @@
#include <asm/sparsemem.h>
/*
- * The linear mapping and the start of memory are both 2M aligned (per
- * the arm64 booting.txt requirements). Hence we can use section mapping
- * with 4K (section size = 2M) but not with 16K (section size = 32M) or
- * 64K (section size = 512M).
+ * The physical and virtual addresses of the start of the kernel image are
+ * equal modulo 2 MiB (per the arm64 booting.txt requirements). Hence we can
+ * use section mapping with 4K (section size = 2M) but not with 16K (section
+ * size = 32M) or 64K (section size = 512M).
*/
-
-/*
- * The idmap and swapper page tables need some space reserved in the kernel
- * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
- * map the kernel. With the 64K page configuration, swapper and idmap need to
- * map to pte level. The swapper also maps the FDT (see __create_page_tables
- * for more information). Note that the number of ID map translation levels
- * could be increased on the fly if system RAM is out of reach for the default
- * VA range, so pages required to map highest possible PA are reserved in all
- * cases.
- */
-#ifdef CONFIG_ARM64_4K_PAGES
-#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1)
+#if defined(PMD_SIZE) && PMD_SIZE <= MIN_KIMG_ALIGN
+#define SWAPPER_BLOCK_SHIFT PMD_SHIFT
+#define SWAPPER_SKIP_LEVEL 1
#else
-#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS)
+#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT
+#define SWAPPER_SKIP_LEVEL 0
#endif
+#define SWAPPER_BLOCK_SIZE (UL(1) << SWAPPER_BLOCK_SHIFT)
+#define SWAPPER_TABLE_SHIFT (SWAPPER_BLOCK_SHIFT + PAGE_SHIFT - 3)
+#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - SWAPPER_SKIP_LEVEL)
+#define INIT_IDMAP_PGTABLE_LEVELS (IDMAP_LEVELS - SWAPPER_SKIP_LEVEL)
+
+#define IDMAP_VA_BITS 48
+#define IDMAP_LEVELS ARM64_HW_PGTABLE_LEVELS(IDMAP_VA_BITS)
+#define IDMAP_ROOT_LEVEL (4 - IDMAP_LEVELS)
/*
* A relocatable kernel may execute from an address that differs from the one at
@@ -50,57 +49,39 @@
#define EARLY_ENTRIES(vstart, vend, shift, add) \
(SPAN_NR_ENTRIES(vstart, vend, shift) + (add))
-#define EARLY_PGDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT, add))
-
-#if SWAPPER_PGTABLE_LEVELS > 3
-#define EARLY_PUDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, PUD_SHIFT, add))
-#else
-#define EARLY_PUDS(vstart, vend, add) (0)
-#endif
+#define EARLY_LEVEL(lvl, lvls, vstart, vend, add) \
+ (lvls > lvl ? EARLY_ENTRIES(vstart, vend, SWAPPER_BLOCK_SHIFT + lvl * (PAGE_SHIFT - 3), add) : 0)
-#if SWAPPER_PGTABLE_LEVELS > 2
-#define EARLY_PMDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, SWAPPER_TABLE_SHIFT, add))
-#else
-#define EARLY_PMDS(vstart, vend, add) (0)
-#endif
+#define EARLY_PAGES(lvls, vstart, vend, add) (1 /* PGDIR page */ \
+ + EARLY_LEVEL(3, (lvls), (vstart), (vend), add) /* each entry needs a next level page table */ \
+ + EARLY_LEVEL(2, (lvls), (vstart), (vend), add) /* each entry needs a next level page table */ \
+ + EARLY_LEVEL(1, (lvls), (vstart), (vend), add))/* each entry needs a next level page table */
+#define INIT_DIR_SIZE (PAGE_SIZE * (EARLY_PAGES(SWAPPER_PGTABLE_LEVELS, KIMAGE_VADDR, _end, EXTRA_PAGE) \
+ + EARLY_SEGMENT_EXTRA_PAGES))
-#define EARLY_PAGES(vstart, vend, add) ( 1 /* PGDIR page */ \
- + EARLY_PGDS((vstart), (vend), add) /* each PGDIR needs a next level page table */ \
- + EARLY_PUDS((vstart), (vend), add) /* each PUD needs a next level page table */ \
- + EARLY_PMDS((vstart), (vend), add)) /* each PMD needs a next level page table */
-#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end, EXTRA_PAGE))
+#define INIT_IDMAP_DIR_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, KIMAGE_VADDR, _end, 1))
+#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + EARLY_IDMAP_EXTRA_PAGES) * PAGE_SIZE)
-/* the initial ID map may need two extra pages if it needs to be extended */
-#if VA_BITS < 48
-#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + 2) * PAGE_SIZE)
-#else
-#define INIT_IDMAP_DIR_SIZE (INIT_IDMAP_DIR_PAGES * PAGE_SIZE)
-#endif
-#define INIT_IDMAP_DIR_PAGES EARLY_PAGES(KIMAGE_VADDR, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE, 1)
+#define INIT_IDMAP_FDT_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, 0UL, UL(MAX_FDT_SIZE), 1) - 1)
+#define INIT_IDMAP_FDT_SIZE ((INIT_IDMAP_FDT_PAGES + EARLY_IDMAP_EXTRA_FDT_PAGES) * PAGE_SIZE)
-/* Initial memory map size */
-#ifdef CONFIG_ARM64_4K_PAGES
-#define SWAPPER_BLOCK_SHIFT PMD_SHIFT
-#define SWAPPER_BLOCK_SIZE PMD_SIZE
-#define SWAPPER_TABLE_SHIFT PUD_SHIFT
-#else
-#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT
-#define SWAPPER_BLOCK_SIZE PAGE_SIZE
-#define SWAPPER_TABLE_SHIFT PMD_SHIFT
-#endif
+/* The number of segments in the kernel image (text, rodata, inittext, initdata, data+bss) */
+#define KERNEL_SEGMENT_COUNT 5
+#if SWAPPER_BLOCK_SIZE > SEGMENT_ALIGN
+#define EARLY_SEGMENT_EXTRA_PAGES (KERNEL_SEGMENT_COUNT + 1)
/*
- * Initial memory map attributes.
+ * The initial ID map consists of the kernel image, mapped as two separate
+ * segments, and may appear misaligned wrt the swapper block size. This means
+ * we need 3 additional pages. The DT could straddle a swapper block boundary,
+ * so it may need 2.
*/
-#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_UXN)
-#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PTE_UXN)
-
-#ifdef CONFIG_ARM64_4K_PAGES
-#define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS | PTE_WRITE)
-#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PMD_SECT_RDONLY)
+#define EARLY_IDMAP_EXTRA_PAGES 3
+#define EARLY_IDMAP_EXTRA_FDT_PAGES 2
#else
-#define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS | PTE_WRITE)
-#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY)
+#define EARLY_SEGMENT_EXTRA_PAGES 0
+#define EARLY_IDMAP_EXTRA_PAGES 0
+#define EARLY_IDMAP_EXTRA_FDT_PAGES 0
#endif
#endif /* __ASM_KERNEL_PGTABLE_H */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index b804fe832184..6f5b41c70103 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -425,15 +425,9 @@ static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
{
switch (kvm_vcpu_trap_get_fault(vcpu)) {
case ESR_ELx_FSC_EXTABT:
- case ESR_ELx_FSC_SEA_TTW0:
- case ESR_ELx_FSC_SEA_TTW1:
- case ESR_ELx_FSC_SEA_TTW2:
- case ESR_ELx_FSC_SEA_TTW3:
+ case ESR_ELx_FSC_SEA_TTW(-1) ... ESR_ELx_FSC_SEA_TTW(3):
case ESR_ELx_FSC_SECC:
- case ESR_ELx_FSC_SECC_TTW0:
- case ESR_ELx_FSC_SECC_TTW1:
- case ESR_ELx_FSC_SECC_TTW2:
- case ESR_ELx_FSC_SECC_TTW3:
+ case ESR_ELx_FSC_SECC_TTW(-1) ... ESR_ELx_FSC_SECC_TTW(3):
return true;
default:
return false;
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 60904a6c4b42..b850b1b91471 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -54,7 +54,11 @@
#define FIXADDR_TOP (-UL(SZ_8M))
#if VA_BITS > 48
+#ifdef CONFIG_ARM64_16K_PAGES
+#define VA_BITS_MIN (47)
+#else
#define VA_BITS_MIN (48)
+#endif
#else
#define VA_BITS_MIN (VA_BITS)
#endif
@@ -209,9 +213,20 @@
#include <asm/boot.h>
#include <asm/bug.h>
#include <asm/sections.h>
+#include <asm/sysreg.h>
+
+static inline u64 __pure read_tcr(void)
+{
+ u64 tcr;
+
+ // read_sysreg() uses asm volatile, so avoid it here
+ asm("mrs %0, tcr_el1" : "=r"(tcr));
+ return tcr;
+}
#if VA_BITS > 48
-extern u64 vabits_actual;
+// For reasons of #include hell, we can't use TCR_T1SZ_OFFSET/TCR_T1SZ_MASK here
+#define vabits_actual (64 - ((read_tcr() >> 16) & 63))
#else
#define vabits_actual ((u64)VA_BITS)
#endif
diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h
index 5966ee4a6154..6d4940342ba7 100644
--- a/arch/arm64/include/asm/mman.h
+++ b/arch/arm64/include/asm/mman.h
@@ -35,11 +35,40 @@ static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags)
}
#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags)
+static inline bool arm64_check_wx_prot(unsigned long prot,
+ struct task_struct *tsk)
+{
+ /*
+ * When we are running with SCTLR_ELx.WXN==1, writable mappings are
+ * implicitly non-executable. This means we should reject such mappings
+ * when user space attempts to create them using mmap() or mprotect().
+ */
+ if (arm64_wxn_enabled() &&
+ ((prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))) {
+ /*
+ * User space libraries such as libffi carry elaborate
+ * heuristics to decide whether it is worth it to even attempt
+ * to create writable executable mappings, as PaX or selinux
+ * enabled systems will outright reject it. They will usually
+ * fall back to something else (e.g., two separate shared
+ * mmap()s of a temporary file) on failure.
+ */
+ pr_info_ratelimited(
+ "process %s (%d) attempted to create PROT_WRITE+PROT_EXEC mapping\n",
+ tsk->comm, tsk->pid);
+ return false;
+ }
+ return true;
+}
+
static inline bool arch_validate_prot(unsigned long prot,
unsigned long addr __always_unused)
{
unsigned long supported = PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM;
+ if (!arm64_check_wx_prot(prot, current))
+ return false;
+
if (system_supports_bti())
supported |= PROT_BTI;
@@ -50,6 +79,13 @@ static inline bool arch_validate_prot(unsigned long prot,
}
#define arch_validate_prot(prot, addr) arch_validate_prot(prot, addr)
+static inline bool arch_validate_mmap_prot(unsigned long prot,
+ unsigned long addr)
+{
+ return arm64_check_wx_prot(prot, current);
+}
+#define arch_validate_mmap_prot arch_validate_mmap_prot
+
static inline bool arch_validate_flags(unsigned long vm_flags)
{
if (!system_supports_mte())
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 2fcf51231d6e..65977c7783c5 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -71,10 +71,46 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
pgprot_t prot, bool page_mappings_only);
extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot);
extern void mark_linear_text_alias_ro(void);
-extern bool kaslr_requires_kpti(void);
+
+/*
+ * This check is triggered during the early boot before the cpufeature
+ * is initialised. Checking the status on the local CPU allows the boot
+ * CPU to detect the need for non-global mappings and thus avoiding a
+ * pagetable re-write after all the CPUs are booted. This check will be
+ * anyway run on individual CPUs, allowing us to get the consistent
+ * state once the SMP CPUs are up and thus make the switch to non-global
+ * mappings if required.
+ */
+static inline bool kaslr_requires_kpti(void)
+{
+ /*
+ * E0PD does a similar job to KPTI so can be used instead
+ * where available.
+ */
+ if (IS_ENABLED(CONFIG_ARM64_E0PD)) {
+ u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
+ if (cpuid_feature_extract_unsigned_field(mmfr2,
+ ID_AA64MMFR2_EL1_E0PD_SHIFT))
+ return false;
+ }
+
+ /*
+ * Systems affected by Cavium erratum 24756 are incompatible
+ * with KPTI.
+ */
+ if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) {
+ extern const struct midr_range cavium_erratum_27456_cpus[];
+
+ if (is_midr_in_range_list(read_cpuid_id(),
+ cavium_erratum_27456_cpus))
+ return false;
+ }
+
+ return true;
+}
#define INIT_MM_CONTEXT(name) \
- .pgd = init_pg_dir,
+ .pgd = swapper_pg_dir,
#endif /* !__ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 9ce4200508b1..f0fe2d09d139 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -20,13 +20,41 @@
#include <asm/cpufeature.h>
#include <asm/daifflags.h>
#include <asm/proc-fns.h>
-#include <asm-generic/mm_hooks.h>
#include <asm/cputype.h>
#include <asm/sysreg.h>
#include <asm/tlbflush.h>
extern bool rodata_full;
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+ struct mm_struct *mm)
+{
+ return 0;
+}
+
+static inline void arch_exit_mmap(struct mm_struct *mm)
+{
+}
+
+static inline void arch_unmap(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+}
+
+static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
+ bool write, bool execute, bool foreign)
+{
+ if (IS_ENABLED(CONFIG_ARM64_WXN) && execute &&
+ (vma->vm_flags & (VM_WRITE | VM_EXEC)) == (VM_WRITE | VM_EXEC)) {
+ pr_warn_ratelimited(
+ "process %s (%d) attempted to execute from writable memory\n",
+ current->comm, current->pid);
+ /* disallow unless the nowxn override is set */
+ return !arm64_wxn_enabled();
+ }
+ return true;
+}
+
static inline void contextidr_thread_switch(struct task_struct *next)
{
if (!IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR))
@@ -61,11 +89,9 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
}
/*
- * TCR.T0SZ value to use when the ID map is active. Usually equals
- * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
- * physical memory, in which case it will be smaller.
+ * TCR.T0SZ value to use when the ID map is active.
*/
-extern int idmap_t0sz;
+#define idmap_t0sz TCR_T0SZ(IDMAP_VA_BITS)
/*
* Ensure TCR.T0SZ is set to the provided value.
@@ -110,18 +136,13 @@ static inline void cpu_uninstall_idmap(void)
cpu_switch_mm(mm->pgd, mm);
}
-static inline void __cpu_install_idmap(pgd_t *idmap)
+static inline void cpu_install_idmap(void)
{
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
cpu_set_idmap_tcr_t0sz();
- cpu_switch_mm(lm_alias(idmap), &init_mm);
-}
-
-static inline void cpu_install_idmap(void)
-{
- __cpu_install_idmap(idmap_pg_dir);
+ cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm);
}
/*
@@ -148,51 +169,21 @@ static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz)
isb();
}
-/*
- * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
- * avoiding the possibility of conflicting TLB entries being allocated.
- */
-static inline void __cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap, bool cnp)
-{
- typedef void (ttbr_replace_func)(phys_addr_t);
- extern ttbr_replace_func idmap_cpu_replace_ttbr1;
- ttbr_replace_func *replace_phys;
- unsigned long daif;
-
- /* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */
- phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp));
-
- if (cnp)
- ttbr1 |= TTBR_CNP_BIT;
-
- replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
-
- __cpu_install_idmap(idmap);
-
- /*
- * We really don't want to take *any* exceptions while TTBR1 is
- * in the process of being replaced so mask everything.
- */
- daif = local_daif_save();
- replace_phys(ttbr1);
- local_daif_restore(daif);
-
- cpu_uninstall_idmap();
-}
+void __cpu_replace_ttbr1(pgd_t *pgdp, bool cnp);
static inline void cpu_enable_swapper_cnp(void)
{
- __cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir, true);
+ __cpu_replace_ttbr1(lm_alias(swapper_pg_dir), true);
}
-static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap)
+static inline void cpu_replace_ttbr1(pgd_t *pgdp)
{
/*
* Only for early TTBR1 replacement before cpucaps are finalized and
* before we've decided whether to use CNP.
*/
WARN_ON(system_capabilities_finalized());
- __cpu_replace_ttbr1(pgdp, idmap, false);
+ __cpu_replace_ttbr1(pgdp, false);
}
/*
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 237224484d0f..8ff5f2a2579e 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -14,6 +14,7 @@
#include <asm/tlbflush.h>
#define __HAVE_ARCH_PGD_FREE
+#define __HAVE_ARCH_PUD_FREE
#include <asm-generic/pgalloc.h>
#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
@@ -43,7 +44,8 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
{
- set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot));
+ if (pgtable_l4_enabled())
+ set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot));
}
static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
@@ -53,6 +55,13 @@ static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
p4dval |= (mm == &init_mm) ? P4D_TABLE_UXN : P4D_TABLE_PXN;
__p4d_populate(p4dp, __pa(pudp), p4dval);
}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ if (!pgtable_l4_enabled())
+ return;
+ __pud_free(mm, pud);
+}
#else
static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
{
@@ -60,6 +69,47 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
}
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
+#if CONFIG_PGTABLE_LEVELS > 4
+
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot)
+{
+ if (pgtable_l5_enabled())
+ set_pgd(pgdp, __pgd(__phys_to_pgd_val(p4dp) | prot));
+}
+
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, p4d_t *p4dp)
+{
+ pgdval_t pgdval = PGD_TYPE_TABLE;
+
+ pgdval |= (mm == &init_mm) ? PGD_TABLE_UXN : PGD_TABLE_PXN;
+ __pgd_populate(pgdp, __pa(p4dp), pgdval);
+}
+
+static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ gfp_t gfp = GFP_PGTABLE_USER;
+
+ if (mm == &init_mm)
+ gfp = GFP_PGTABLE_KERNEL;
+ return (p4d_t *)get_zeroed_page(gfp);
+}
+
+static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+ if (!pgtable_l5_enabled())
+ return;
+ BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
+ free_page((unsigned long)p4d);
+}
+
+#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d)
+#else
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot)
+{
+ BUILD_BUG();
+}
+#endif /* CONFIG_PGTABLE_LEVELS > 4 */
+
extern pgd_t *pgd_alloc(struct mm_struct *mm);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index e4944d517c99..ef207a0d4f0d 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -26,10 +26,10 @@
#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3))
/*
- * Size mapped by an entry at level n ( 0 <= n <= 3)
+ * Size mapped by an entry at level n ( -1 <= n <= 3)
* We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits
* in the final page. The maximum number of translation levels supported by
- * the architecture is 4. Hence, starting at level n, we have further
+ * the architecture is 5. Hence, starting at level n, we have further
* ((4 - n) - 1) levels of translation excluding the offset within the page.
* So, the total number of bits mapped by an entry at level n is :
*
@@ -62,9 +62,16 @@
#define PTRS_PER_PUD (1 << (PAGE_SHIFT - 3))
#endif
+#if CONFIG_PGTABLE_LEVELS > 4
+#define P4D_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(0)
+#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
+#define P4D_MASK (~(P4D_SIZE-1))
+#define PTRS_PER_P4D (1 << (PAGE_SHIFT - 3))
+#endif
+
/*
* PGDIR_SHIFT determines the size a top-level page table entry can map
- * (depending on the configuration, this level can be 0, 1 or 2).
+ * (depending on the configuration, this level can be -1, 0, 1 or 2).
*/
#define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
@@ -87,6 +94,15 @@
/*
* Hardware page table definitions.
*
+ * Level -1 descriptor (PGD).
+ */
+#define PGD_TYPE_TABLE (_AT(pgdval_t, 3) << 0)
+#define PGD_TABLE_BIT (_AT(pgdval_t, 1) << 1)
+#define PGD_TYPE_MASK (_AT(pgdval_t, 3) << 0)
+#define PGD_TABLE_PXN (_AT(pgdval_t, 1) << 59)
+#define PGD_TABLE_UXN (_AT(pgdval_t, 1) << 60)
+
+/*
* Level 0 descriptor (P4D).
*/
#define P4D_TYPE_TABLE (_AT(p4dval_t, 3) << 0)
@@ -155,13 +171,17 @@
#define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */
#define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */
-#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
+#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (50 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
#ifdef CONFIG_ARM64_PA_BITS_52
+#ifdef CONFIG_ARM64_64K_PAGES
#define PTE_ADDR_HIGH (_AT(pteval_t, 0xf) << 12)
-#define PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH)
#define PTE_ADDR_HIGH_SHIFT 36
+#define PHYS_TO_PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH)
#else
-#define PTE_ADDR_MASK PTE_ADDR_LOW
+#define PTE_ADDR_HIGH (_AT(pteval_t, 0x3) << 8)
+#define PTE_ADDR_HIGH_SHIFT 42
+#define PHYS_TO_PTE_ADDR_MASK GENMASK_ULL(49, 8)
+#endif
#endif
/*
@@ -284,6 +304,7 @@
#define TCR_E0PD1 (UL(1) << 56)
#define TCR_TCMA0 (UL(1) << 57)
#define TCR_TCMA1 (UL(1) << 58)
+#define TCR_DS (UL(1) << 59)
/*
* TTBR.
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 483dbfa39c4c..dd9ee67d1d87 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -30,8 +30,8 @@
#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
-#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG)
-#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG)
+#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_MAYBE_NG | PTE_MAYBE_SHARED | PTE_AF)
+#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_MAYBE_NG | PMD_MAYBE_SHARED | PMD_SECT_AF)
#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
@@ -57,10 +57,6 @@
#define _PAGE_READONLY_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
#define _PAGE_EXECONLY (_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
-#ifdef __ASSEMBLY__
-#define PTE_MAYBE_NG 0
-#endif
-
#ifndef __ASSEMBLY__
#include <asm/cpufeature.h>
@@ -71,7 +67,19 @@ extern bool arm64_use_ng_mappings;
#define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0)
#define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0)
+#ifndef CONFIG_ARM64_LPA2
#define lpa2_is_enabled() false
+#define PTE_MAYBE_SHARED PTE_SHARED
+#define PMD_MAYBE_SHARED PMD_SECT_S
+#else
+static inline bool __pure lpa2_is_enabled(void)
+{
+ return read_tcr() & TCR_DS;
+}
+
+#define PTE_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PTE_SHARED)
+#define PMD_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PMD_SECT_S)
+#endif
/*
* If we have userspace only BTI we don't want to mark kernel pages
diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h
index b8f158ae2527..6d6d4065b0cb 100644
--- a/arch/arm64/include/asm/pgtable-types.h
+++ b/arch/arm64/include/asm/pgtable-types.h
@@ -36,6 +36,12 @@ typedef struct { pudval_t pud; } pud_t;
#define __pud(x) ((pud_t) { (x) } )
#endif
+#if CONFIG_PGTABLE_LEVELS > 4
+typedef struct { p4dval_t p4d; } p4d_t;
+#define p4d_val(x) ((x).p4d)
+#define __p4d(x) ((p4d_t) { (x) } )
+#endif
+
typedef struct { pgdval_t pgd; } pgd_t;
#define pgd_val(x) ((x).pgd)
#define __pgd(x) ((pgd_t) { (x) } )
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 522c21348ae8..8bec85350865 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -80,15 +80,16 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
#ifdef CONFIG_ARM64_PA_BITS_52
static inline phys_addr_t __pte_to_phys(pte_t pte)
{
+ pte_val(pte) &= ~PTE_MAYBE_SHARED;
return (pte_val(pte) & PTE_ADDR_LOW) |
((pte_val(pte) & PTE_ADDR_HIGH) << PTE_ADDR_HIGH_SHIFT);
}
static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
{
- return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PTE_ADDR_MASK;
+ return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PHYS_TO_PTE_ADDR_MASK;
}
#else
-#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_MASK)
+#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_LOW)
#define __phys_to_pte_val(phys) (phys)
#endif
@@ -620,12 +621,12 @@ static inline bool pud_table(pud_t pud) { return true; }
PUD_TYPE_TABLE)
#endif
-extern pgd_t init_pg_dir[PTRS_PER_PGD];
+extern pgd_t init_pg_dir[];
extern pgd_t init_pg_end[];
-extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
-extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
-extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
-extern pgd_t reserved_pg_dir[PTRS_PER_PGD];
+extern pgd_t swapper_pg_dir[];
+extern pgd_t idmap_pg_dir[];
+extern pgd_t tramp_pg_dir[];
+extern pgd_t reserved_pg_dir[];
extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd);
@@ -698,14 +699,14 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
#define pud_user(pud) pte_user(pud_pte(pud))
#define pud_user_exec(pud) pte_user_exec(pud_pte(pud))
+static inline bool pgtable_l4_enabled(void);
+
static inline void set_pud(pud_t *pudp, pud_t pud)
{
-#ifdef __PAGETABLE_PUD_FOLDED
- if (in_swapper_pgdir(pudp)) {
+ if (!pgtable_l4_enabled() && in_swapper_pgdir(pudp)) {
set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud)));
return;
}
-#endif /* __PAGETABLE_PUD_FOLDED */
WRITE_ONCE(*pudp, pud);
@@ -758,12 +759,27 @@ static inline pmd_t *pud_pgtable(pud_t pud)
#if CONFIG_PGTABLE_LEVELS > 3
+static __always_inline bool pgtable_l4_enabled(void)
+{
+ if (CONFIG_PGTABLE_LEVELS > 4 || !IS_ENABLED(CONFIG_ARM64_LPA2))
+ return true;
+ if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT))
+ return vabits_actual == VA_BITS;
+ return alternative_has_cap_unlikely(ARM64_HAS_VA52);
+}
+
+static inline bool mm_pud_folded(const struct mm_struct *mm)
+{
+ return !pgtable_l4_enabled();
+}
+#define mm_pud_folded mm_pud_folded
+
#define pud_ERROR(e) \
pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e))
-#define p4d_none(p4d) (!p4d_val(p4d))
-#define p4d_bad(p4d) (!(p4d_val(p4d) & 2))
-#define p4d_present(p4d) (p4d_val(p4d))
+#define p4d_none(p4d) (pgtable_l4_enabled() && !p4d_val(p4d))
+#define p4d_bad(p4d) (pgtable_l4_enabled() && !(p4d_val(p4d) & 2))
+#define p4d_present(p4d) (!p4d_none(p4d))
static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
{
@@ -779,7 +795,8 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
static inline void p4d_clear(p4d_t *p4dp)
{
- set_p4d(p4dp, __p4d(0));
+ if (pgtable_l4_enabled())
+ set_p4d(p4dp, __p4d(0));
}
static inline phys_addr_t p4d_page_paddr(p4d_t p4d)
@@ -787,27 +804,75 @@ static inline phys_addr_t p4d_page_paddr(p4d_t p4d)
return __p4d_to_phys(p4d);
}
+#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+
+static inline pud_t *p4d_to_folded_pud(p4d_t *p4dp, unsigned long addr)
+{
+ return (pud_t *)PTR_ALIGN_DOWN(p4dp, PAGE_SIZE) + pud_index(addr);
+}
+
static inline pud_t *p4d_pgtable(p4d_t p4d)
{
return (pud_t *)__va(p4d_page_paddr(p4d));
}
-/* Find an entry in the first-level page table. */
-#define pud_offset_phys(dir, addr) (p4d_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t))
+static inline phys_addr_t pud_offset_phys(p4d_t *p4dp, unsigned long addr)
+{
+ BUG_ON(!pgtable_l4_enabled());
-#define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr))
-#define pud_set_fixmap_offset(p4d, addr) pud_set_fixmap(pud_offset_phys(p4d, addr))
-#define pud_clear_fixmap() clear_fixmap(FIX_PUD)
+ return p4d_page_paddr(READ_ONCE(*p4dp)) + pud_index(addr) * sizeof(pud_t);
+}
-#define p4d_page(p4d) pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d)))
+static inline
+pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long addr)
+{
+ if (!pgtable_l4_enabled())
+ return p4d_to_folded_pud(p4dp, addr);
+ return (pud_t *)__va(p4d_page_paddr(p4d)) + pud_index(addr);
+}
+#define pud_offset_lockless pud_offset_lockless
+
+static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long addr)
+{
+ return pud_offset_lockless(p4dp, READ_ONCE(*p4dp), addr);
+}
+#define pud_offset pud_offset
+
+static inline pud_t *pud_set_fixmap(unsigned long addr)
+{
+ if (!pgtable_l4_enabled())
+ return NULL;
+ return (pud_t *)set_fixmap_offset(FIX_PUD, addr);
+}
+
+static inline pud_t *pud_set_fixmap_offset(p4d_t *p4dp, unsigned long addr)
+{
+ if (!pgtable_l4_enabled())
+ return p4d_to_folded_pud(p4dp, addr);
+ return pud_set_fixmap(pud_offset_phys(p4dp, addr));
+}
+
+static inline void pud_clear_fixmap(void)
+{
+ if (pgtable_l4_enabled())
+ clear_fixmap(FIX_PUD);
+}
/* use ONLY for statically allocated translation tables */
-#define pud_offset_kimg(dir,addr) ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr))))
+static inline pud_t *pud_offset_kimg(p4d_t *p4dp, u64 addr)
+{
+ if (!pgtable_l4_enabled())
+ return p4d_to_folded_pud(p4dp, addr);
+ return (pud_t *)__phys_to_kimg(pud_offset_phys(p4dp, addr));
+}
+
+#define p4d_page(p4d) pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d)))
#else
+static inline bool pgtable_l4_enabled(void) { return false; }
+
#define p4d_page_paddr(p4d) ({ BUILD_BUG(); 0;})
-#define pgd_page_paddr(pgd) ({ BUILD_BUG(); 0;})
/* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */
#define pud_set_fixmap(addr) NULL
@@ -818,6 +883,122 @@ static inline pud_t *p4d_pgtable(p4d_t p4d)
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
+#if CONFIG_PGTABLE_LEVELS > 4
+
+static __always_inline bool pgtable_l5_enabled(void)
+{
+ if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT))
+ return vabits_actual == VA_BITS;
+ return alternative_has_cap_unlikely(ARM64_HAS_VA52);
+}
+
+static inline bool mm_p4d_folded(const struct mm_struct *mm)
+{
+ return !pgtable_l5_enabled();
+}
+#define mm_p4d_folded mm_p4d_folded
+
+#define p4d_ERROR(e) \
+ pr_err("%s:%d: bad p4d %016llx.\n", __FILE__, __LINE__, p4d_val(e))
+
+#define pgd_none(pgd) (pgtable_l5_enabled() && !pgd_val(pgd))
+#define pgd_bad(pgd) (pgtable_l5_enabled() && !(pgd_val(pgd) & 2))
+#define pgd_present(pgd) (!pgd_none(pgd))
+
+static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ if (in_swapper_pgdir(pgdp)) {
+ set_swapper_pgd(pgdp, __pgd(pgd_val(pgd)));
+ return;
+ }
+
+ WRITE_ONCE(*pgdp, pgd);
+ dsb(ishst);
+ isb();
+}
+
+static inline void pgd_clear(pgd_t *pgdp)
+{
+ if (pgtable_l5_enabled())
+ set_pgd(pgdp, __pgd(0));
+}
+
+static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
+{
+ return __pgd_to_phys(pgd);
+}
+
+#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
+
+static inline p4d_t *pgd_to_folded_p4d(pgd_t *pgdp, unsigned long addr)
+{
+ return (p4d_t *)PTR_ALIGN_DOWN(pgdp, PAGE_SIZE) + p4d_index(addr);
+}
+
+static inline phys_addr_t p4d_offset_phys(pgd_t *pgdp, unsigned long addr)
+{
+ BUG_ON(!pgtable_l5_enabled());
+
+ return pgd_page_paddr(READ_ONCE(*pgdp)) + p4d_index(addr) * sizeof(p4d_t);
+}
+
+static inline
+p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long addr)
+{
+ if (!pgtable_l5_enabled())
+ return pgd_to_folded_p4d(pgdp, addr);
+ return (p4d_t *)__va(pgd_page_paddr(pgd)) + p4d_index(addr);
+}
+#define p4d_offset_lockless p4d_offset_lockless
+
+static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long addr)
+{
+ return p4d_offset_lockless(pgdp, READ_ONCE(*pgdp), addr);
+}
+
+static inline p4d_t *p4d_set_fixmap(unsigned long addr)
+{
+ if (!pgtable_l5_enabled())
+ return NULL;
+ return (p4d_t *)set_fixmap_offset(FIX_P4D, addr);
+}
+
+static inline p4d_t *p4d_set_fixmap_offset(pgd_t *pgdp, unsigned long addr)
+{
+ if (!pgtable_l5_enabled())
+ return pgd_to_folded_p4d(pgdp, addr);
+ return p4d_set_fixmap(p4d_offset_phys(pgdp, addr));
+}
+
+static inline void p4d_clear_fixmap(void)
+{
+ if (pgtable_l5_enabled())
+ clear_fixmap(FIX_P4D);
+}
+
+/* use ONLY for statically allocated translation tables */
+static inline p4d_t *p4d_offset_kimg(pgd_t *pgdp, u64 addr)
+{
+ if (!pgtable_l5_enabled())
+ return pgd_to_folded_p4d(pgdp, addr);
+ return (p4d_t *)__phys_to_kimg(p4d_offset_phys(pgdp, addr));
+}
+
+#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd)))
+
+#else
+
+static inline bool pgtable_l5_enabled(void) { return false; }
+
+/* Match p4d_offset folding in <asm/generic/pgtable-nop4d.h> */
+#define p4d_set_fixmap(addr) NULL
+#define p4d_set_fixmap_offset(p4dp, addr) ((p4d_t *)p4dp)
+#define p4d_clear_fixmap()
+
+#define p4d_offset_kimg(dir,addr) ((p4d_t *)dir)
+
+#endif /* CONFIG_PGTABLE_LEVELS > 4 */
+
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e))
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
index 3fdae5fe3142..2e010ea76be2 100644
--- a/arch/arm64/include/asm/scs.h
+++ b/arch/arm64/include/asm/scs.h
@@ -33,37 +33,11 @@
#include <asm/cpufeature.h>
#ifdef CONFIG_UNWIND_PATCH_PAC_INTO_SCS
-static inline bool should_patch_pac_into_scs(void)
-{
- u64 reg;
-
- /*
- * We only enable the shadow call stack dynamically if we are running
- * on a system that does not implement PAC or BTI. PAC and SCS provide
- * roughly the same level of protection, and BTI relies on the PACIASP
- * instructions serving as landing pads, preventing us from patching
- * those instructions into something else.
- */
- reg = read_sysreg_s(SYS_ID_AA64ISAR1_EL1);
- if (SYS_FIELD_GET(ID_AA64ISAR1_EL1, APA, reg) |
- SYS_FIELD_GET(ID_AA64ISAR1_EL1, API, reg))
- return false;
-
- reg = read_sysreg_s(SYS_ID_AA64ISAR2_EL1);
- if (SYS_FIELD_GET(ID_AA64ISAR2_EL1, APA3, reg))
- return false;
-
- if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) {
- reg = read_sysreg_s(SYS_ID_AA64PFR1_EL1);
- if (reg & (0xf << ID_AA64PFR1_EL1_BT_SHIFT))
- return false;
- }
- return true;
-}
-
static inline void dynamic_scs_init(void)
{
- if (should_patch_pac_into_scs()) {
+ extern bool __pi_dynamic_scs_is_enabled;
+
+ if (__pi_dynamic_scs_is_enabled) {
pr_info("Enabling dynamic shadow call stack\n");
static_branch_enable(&dynamic_scs_enabled);
}
@@ -72,8 +46,8 @@ static inline void dynamic_scs_init(void)
static inline void dynamic_scs_init(void) {}
#endif
-int scs_patch(const u8 eh_frame[], int size);
-asmlinkage void scs_patch_vmlinux(void);
+int __pi_scs_patch(const u8 eh_frame[], int size);
+asmlinkage void __pi_scs_patch_vmlinux(void);
#endif /* __ASSEMBLY __ */
diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h
index 2e4d7da74fb8..ba269a7a3201 100644
--- a/arch/arm64/include/asm/setup.h
+++ b/arch/arm64/include/asm/setup.h
@@ -7,9 +7,6 @@
#include <uapi/asm/setup.h>
-void *get_early_fdt_ptr(void);
-void early_fdt_map(u64 dt_phys);
-
/*
* These two variables are used in the head.S file.
*/
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 0150deb332af..a947c6e784ed 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -103,6 +103,9 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
{
struct ptdesc *ptdesc = virt_to_ptdesc(pudp);
+ if (!pgtable_l4_enabled())
+ return;
+
pagetable_pud_dtor(ptdesc);
tlb_remove_ptdesc(tlb, ptdesc);
}