diff options
Diffstat (limited to 'arch/arm64/include')
113 files changed, 4731 insertions, 2324 deletions
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 4b6d2d52053e..d2ff8f6c3231 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -1,6 +1,16 @@ # SPDX-License-Identifier: GPL-2.0 +syscall-y += syscall_table_32.h +syscall-y += syscall_table_64.h + +# arm32 syscall table used by lib/compat_audit.c: +syscall-y += unistd_32.h +# same constants with prefixes, used by vdso, seccomp and sigreturn: +syscall-y += unistd_compat_32.h + generic-y += early_ioremap.h +generic-y += fprobe.h generic-y += mcs_spinlock.h +generic-y += mmzone.h generic-y += qrwlock.h generic-y += qspinlock.h generic-y += parport.h diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 6792a1f83f2a..a407f9cd549e 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -119,6 +119,18 @@ static inline u32 get_acpi_id_for_cpu(unsigned int cpu) return acpi_cpu_get_madt_gicc(cpu)->uid; } +static inline int get_cpu_for_acpi_id(u32 uid) +{ + int cpu; + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (acpi_cpu_get_madt_gicc(cpu) && + uid == get_acpi_id_for_cpu(cpu)) + return cpu; + + return -EINVAL; +} + static inline void arch_fix_phys_package_id(int num, u32 slot) { } void __init acpi_init_cpus(void); int apei_claim_sea(struct pt_regs *regs); diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index d328f549b1a6..c8c77f9e36d6 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -230,7 +230,11 @@ alternative_has_cap_likely(const unsigned long cpucap) return false; asm goto( +#ifdef BUILD_VDSO + ALTERNATIVE("b %l[l_no]", "nop", %[cpucap]) +#else ALTERNATIVE_CB("b %l[l_no]", %[cpucap], alt_cb_patch_nops) +#endif : : [cpucap] "i" (cpucap) : diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 5f172611654b..9e96f024b2f1 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -175,21 +175,6 @@ static inline bool gic_prio_masking_enabled(void) static inline void gic_pmr_mask_irqs(void) { - BUILD_BUG_ON(GICD_INT_DEF_PRI < (__GIC_PRIO_IRQOFF | - GIC_PRIO_PSR_I_SET)); - BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON); - /* - * Need to make sure IRQON allows IRQs when SCR_EL3.FIQ is cleared - * and non-secure PMR accesses are not subject to the shifts that - * are applied to IRQ priorities - */ - BUILD_BUG_ON((0x80 | (GICD_INT_DEF_PRI >> 1)) >= GIC_PRIO_IRQON); - /* - * Same situation as above, but now we make sure that we can mask - * regular interrupts. - */ - BUILD_BUG_ON((0x80 | (GICD_INT_DEF_PRI >> 1)) < (__GIC_PRIO_IRQOFF_NS | - GIC_PRIO_PSR_I_SET)); gic_write_pmr(GIC_PRIO_IRQOFF); } diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 934c658ee947..f5794d50f51d 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -15,7 +15,7 @@ #include <linux/bug.h> #include <linux/init.h> #include <linux/jump_label.h> -#include <linux/smp.h> +#include <linux/percpu.h> #include <linux/types.h> #include <clocksource/arm_arch_timer.h> diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h index ecdb3cfcd0f8..8babfbe31f95 100644 --- a/arch/arm64/include/asm/archrandom.h +++ b/arch/arm64/include/asm/archrandom.h @@ -129,6 +129,4 @@ static inline bool __init __early_cpu_has_rndr(void) return (ftr >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf; } -u64 kaslr_early_init(void *fdt); - #endif /* _ASM_ARCHRANDOM_H */ diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h index c27404fa4418..8a777dec8d88 100644 --- a/arch/arm64/include/asm/arm_pmuv3.h +++ b/arch/arm64/include/asm/arm_pmuv3.h @@ -6,7 +6,7 @@ #ifndef __ASM_PMUV3_H #define __ASM_PMUV3_H -#include <linux/kvm_host.h> +#include <asm/kvm_host.h> #include <asm/cpufeature.h> #include <asm/sysreg.h> @@ -33,6 +33,14 @@ static inline void write_pmevtypern(int n, unsigned long val) PMEVN_SWITCH(n, WRITE_PMEVTYPERN); } +#define RETURN_READ_PMEVTYPERN(n) \ + return read_sysreg(pmevtyper##n##_el0) +static inline unsigned long read_pmevtypern(int n) +{ + PMEVN_SWITCH(n, RETURN_READ_PMEVTYPERN); + return 0; +} + static inline unsigned long read_pmmir(void) { return read_cpuid(PMMIR_EL1); @@ -46,6 +54,14 @@ static inline u32 read_pmuver(void) ID_AA64DFR0_EL1_PMUVer_SHIFT); } +static inline bool pmuv3_has_icntr(void) +{ + u64 dfr1 = read_sysreg(id_aa64dfr1_el1); + + return !!cpuid_feature_extract_unsigned_field(dfr1, + ID_AA64DFR1_EL1_PMICNTR_SHIFT); +} + static inline void write_pmcr(u64 val) { write_sysreg(val, pmcr_el0); @@ -71,22 +87,32 @@ static inline u64 read_pmccntr(void) return read_sysreg(pmccntr_el0); } -static inline void write_pmcntenset(u32 val) +static inline void write_pmicntr(u64 val) +{ + write_sysreg_s(val, SYS_PMICNTR_EL0); +} + +static inline u64 read_pmicntr(void) +{ + return read_sysreg_s(SYS_PMICNTR_EL0); +} + +static inline void write_pmcntenset(u64 val) { write_sysreg(val, pmcntenset_el0); } -static inline void write_pmcntenclr(u32 val) +static inline void write_pmcntenclr(u64 val) { write_sysreg(val, pmcntenclr_el0); } -static inline void write_pmintenset(u32 val) +static inline void write_pmintenset(u64 val) { write_sysreg(val, pmintenset_el1); } -static inline void write_pmintenclr(u32 val) +static inline void write_pmintenclr(u64 val) { write_sysreg(val, pmintenclr_el1); } @@ -96,12 +122,27 @@ static inline void write_pmccfiltr(u64 val) write_sysreg(val, pmccfiltr_el0); } -static inline void write_pmovsclr(u32 val) +static inline u64 read_pmccfiltr(void) +{ + return read_sysreg(pmccfiltr_el0); +} + +static inline void write_pmicfiltr(u64 val) +{ + write_sysreg_s(val, SYS_PMICFILTR_EL0); +} + +static inline u64 read_pmicfiltr(void) +{ + return read_sysreg_s(SYS_PMICFILTR_EL0); +} + +static inline void write_pmovsclr(u64 val) { write_sysreg(val, pmovsclr_el0); } -static inline u32 read_pmovsclr(void) +static inline u64 read_pmovsclr(void) { return read_sysreg(pmovsclr_el0); } @@ -111,6 +152,11 @@ static inline void write_pmuserenr(u32 val) write_sysreg(val, pmuserenr_el0); } +static inline void write_pmuacr(u64 val) +{ + write_sysreg_s(val, SYS_PMUACR_EL1); +} + static inline u64 read_pmceid0(void) { return read_sysreg(pmceid0_el0); @@ -137,4 +183,9 @@ static inline bool is_pmuv3p5(int pmuver) return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5; } +static inline bool is_pmuv3p9(int pmuver) +{ + return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P9; +} + #endif diff --git a/arch/arm64/include/asm/asm-bug.h b/arch/arm64/include/asm/asm-bug.h index c762038ba400..6e73809f6492 100644 --- a/arch/arm64/include/asm/asm-bug.h +++ b/arch/arm64/include/asm/asm-bug.h @@ -28,6 +28,7 @@ 14470: .long 14471f - .; \ _BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ .short flags; \ + .align 2; \ .popsection; \ 14471: #else diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h index 980d1dd8e1a3..b8a5861dc7b7 100644 --- a/arch/arm64/include/asm/asm-extable.h +++ b/arch/arm64/include/asm/asm-extable.h @@ -112,6 +112,9 @@ #define _ASM_EXTABLE_KACCESS_ERR(insn, fixup, err) \ _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, wzr) +#define _ASM_EXTABLE_KACCESS(insn, fixup) \ + _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, wzr, wzr) + #define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \ __DEFINE_ASM_GPR_NUMS \ __ASM_EXTABLE_RAW(#insn, #fixup, \ diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 513787e43329..ad63457a05c5 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -38,10 +38,6 @@ msr daifset, #0xf .endm - .macro enable_daif - msr daifclr, #0xf - .endm - /* * Save/restore interrupts. */ @@ -54,16 +50,12 @@ msr daif, \flags .endm - .macro enable_dbg - msr daifclr, #8 - .endm - .macro disable_step_tsk, flgs, tmp tbz \flgs, #TIF_SINGLESTEP, 9990f mrs \tmp, mdscr_el1 bic \tmp, \tmp, #DBG_MDSCR_SS msr mdscr_el1, \tmp - isb // Synchronise with enable_dbg + isb // Take effect before a subsequent clear of DAIF.D 9990: .endm @@ -257,13 +249,6 @@ alternative_endif .endm /* - * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm) - */ - .macro vma_vm_mm, rd, rn - ldr \rd, [\rn, #VMA_VM_MM] - .endm - -/* * read_ctr - read CTR_EL0. If the system has mismatched register fields, * provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val */ @@ -346,20 +331,6 @@ alternative_cb_end .endm /* - * idmap_get_t0sz - get the T0SZ value needed to cover the ID map - * - * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the - * entire ID map region can be mapped. As T0SZ == (64 - #bits used), - * this number conveniently equals the number of leading zeroes in - * the physical address of _end. - */ - .macro idmap_get_t0sz, reg - adrp \reg, _end - orr \reg, \reg, #(1 << VA_BITS_MIN) - 1 - clz \reg, \reg - .endm - -/* * tcr_compute_pa_size - set TCR.(I)PS to the highest supported * ID_AA64MMFR0_EL1.PARange value * @@ -372,6 +343,11 @@ alternative_cb_end // Narrow PARange to fit the PS field in TCR_ELx ubfx \tmp0, \tmp0, #ID_AA64MMFR0_EL1_PARANGE_SHIFT, #3 mov \tmp1, #ID_AA64MMFR0_EL1_PARANGE_MAX +#ifdef CONFIG_ARM64_LPA2 +alternative_if_not ARM64_HAS_VA52 + mov \tmp1, #ID_AA64MMFR0_EL1_PARANGE_48 +alternative_else_nop_endif +#endif cmp \tmp0, \tmp1 csel \tmp0, \tmp1, \tmp0, hi bfi \tcr, \tmp0, \pos, #3 @@ -498,9 +474,10 @@ alternative_endif */ .macro reset_pmuserenr_el0, tmpreg mrs \tmpreg, id_aa64dfr0_el1 - sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 - cmp \tmpreg, #1 // Skip if no PMU present - b.lt 9000f + ubfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 + cmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_NI + ccmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne + b.eq 9000f // Skip if no PMU present or IMP_DEF msr pmuserenr_el0, xzr // Disable PMU access from EL0 9000: .endm @@ -590,18 +567,27 @@ alternative_endif .endm /* - * Offset ttbr1 to allow for 48-bit kernel VAs set with 52-bit PTRS_PER_PGD. + * If the kernel is built for 52-bit virtual addressing but the hardware only + * supports 48 bits, we cannot program the pgdir address into TTBR1 directly, + * but we have to add an offset so that the TTBR1 address corresponds with the + * pgdir entry that covers the lowest 48-bit addressable VA. + * + * Note that this trick is only used for LVA/64k pages - LPA2/4k pages uses an + * additional paging level, and on LPA2/16k pages, we would end up with a root + * level table with only 2 entries, which is suboptimal in terms of TLB + * utilization, so there we fall back to 47 bits of translation if LPA2 is not + * supported. + * * orr is used as it can cover the immediate value (and is idempotent). - * In future this may be nop'ed out when dealing with 52-bit kernel VAs. * ttbr: Value of ttbr to set, modified. */ .macro offset_ttbr1, ttbr, tmp -#ifdef CONFIG_ARM64_VA_BITS_52 - mrs_s \tmp, SYS_ID_AA64MMFR2_EL1 - and \tmp, \tmp, #(0xf << ID_AA64MMFR2_EL1_VARange_SHIFT) - cbnz \tmp, .Lskipoffs_\@ - orr \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET -.Lskipoffs_\@ : +#if defined(CONFIG_ARM64_VA_BITS_52) && !defined(CONFIG_ARM64_LPA2) + mrs \tmp, tcr_el1 + and \tmp, \tmp, #TCR_T1SZ_MASK + cmp \tmp, #TCR_T1SZ(VA_BITS_MIN) + orr \tmp, \ttbr, #TTBR1_BADDR_4852_OFFSET + csel \ttbr, \tmp, \ttbr, eq #endif .endm @@ -623,25 +609,13 @@ alternative_endif .macro phys_to_pte, pte, phys #ifdef CONFIG_ARM64_PA_BITS_52 - /* - * We assume \phys is 64K aligned and this is guaranteed by only - * supporting this configuration with 64K pages. - */ - orr \pte, \phys, \phys, lsr #36 - and \pte, \pte, #PTE_ADDR_MASK + orr \pte, \phys, \phys, lsr #PTE_ADDR_HIGH_SHIFT + and \pte, \pte, #PHYS_TO_PTE_ADDR_MASK #else mov \pte, \phys #endif .endm - .macro pte_to_phys, phys, pte - and \phys, \pte, #PTE_ADDR_MASK -#ifdef CONFIG_ARM64_PA_BITS_52 - orr \phys, \phys, \phys, lsl #PTE_ADDR_HIGH_SHIFT - and \phys, \phys, GENMASK_ULL(PHYS_MASK_SHIFT - 1, PAGE_SHIFT) -#endif - .endm - /* * tcr_clear_errata_bits - Clear TCR bits that trigger an errata on this CPU. */ diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index cf2987464c18..1ca947d5c939 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -40,6 +40,10 @@ */ #define dgh() asm volatile("hint #6" : : : "memory") +#define spec_bar() asm volatile(ALTERNATIVE("dsb nsh\nisb\n", \ + SB_BARRIER_INSN"nop\n", \ + ARM64_HAS_SB)) + #ifdef CONFIG_ARM64_PSEUDO_NMI #define pmr_sync() \ do { \ diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h index 1abdcd508a11..beb42c62b6ac 100644 --- a/arch/arm64/include/asm/brk-imm.h +++ b/arch/arm64/include/asm/brk-imm.h @@ -11,6 +11,7 @@ * 0x004: for installing kprobes * 0x005: for installing uprobes * 0x006: for kprobe software single-step + * 0x007: for kretprobe return * Allowed values for kgdb are 0x400 - 0x7ff * 0x100: for triggering a fault on purpose (reserved) * 0x400: for dynamic BRK instruction @@ -23,6 +24,7 @@ #define KPROBES_BRK_IMM 0x004 #define UPROBES_BRK_IMM 0x005 #define KPROBES_BRK_SS_IMM 0x006 +#define KRETPROBES_BRK_IMM 0x007 #define FAULT_BRK_IMM 0x100 #define KGDB_DYN_DBG_BRK_IMM 0x400 #define KGDB_COMPILED_DBG_BRK_IMM 0x401 diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index fefac75fa009..28ab96e808ef 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -117,7 +117,7 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *, * flush_dcache_folio is used when the kernel has written to the page * cache page at virtual address page->virtual. * - * If this page isn't mapped (ie, page_mapping == NULL), or it might + * If this page isn't mapped (ie, folio_mapping == NULL), or it might * have userspace mappings, then we _must_ always clean + invalidate * the dcache entries associated with the kernel mapping. * diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index b1e43f56ee46..81e4157f92b7 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -46,20 +46,25 @@ struct cpuinfo_arm64 { u64 reg_revidr; u64 reg_gmid; u64 reg_smidr; + u64 reg_mpamidr; u64 reg_id_aa64dfr0; u64 reg_id_aa64dfr1; u64 reg_id_aa64isar0; u64 reg_id_aa64isar1; u64 reg_id_aa64isar2; + u64 reg_id_aa64isar3; u64 reg_id_aa64mmfr0; u64 reg_id_aa64mmfr1; u64 reg_id_aa64mmfr2; u64 reg_id_aa64mmfr3; + u64 reg_id_aa64mmfr4; u64 reg_id_aa64pfr0; u64 reg_id_aa64pfr1; + u64 reg_id_aa64pfr2; u64 reg_id_aa64zfr0; u64 reg_id_aa64smfr0; + u64 reg_id_aa64fpfr0; struct cpuinfo_32bit aarch32; }; diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 270680e2b5c4..0b5ca6e0eb09 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -42,6 +42,12 @@ cpucap_is_possible(const unsigned int cap) return IS_ENABLED(CONFIG_ARM64_BTI); case ARM64_HAS_TLB_RANGE: return IS_ENABLED(CONFIG_ARM64_TLB_RANGE); + case ARM64_HAS_S1POE: + return IS_ENABLED(CONFIG_ARM64_POE); + case ARM64_HAS_GCS: + return IS_ENABLED(CONFIG_ARM64_GCS); + case ARM64_HAFT: + return IS_ENABLED(CONFIG_ARM64_HAFT); case ARM64_UNMAP_KERNEL_AT_EL0: return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0); case ARM64_WORKAROUND_843419: @@ -58,6 +64,13 @@ cpucap_is_possible(const unsigned int cap) return IS_ENABLED(CONFIG_NVIDIA_CARMEL_CNP_ERRATUM); case ARM64_WORKAROUND_REPEAT_TLBI: return IS_ENABLED(CONFIG_ARM64_WORKAROUND_REPEAT_TLBI); + case ARM64_WORKAROUND_SPECULATIVE_SSBS: + return IS_ENABLED(CONFIG_ARM64_ERRATUM_3194386); + case ARM64_MPAM: + /* + * KVM MPAM support doesn't rely on the host kernel supporting MPAM. + */ + return true; } return true; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index bd8d4ca81a48..e0e4478f5fb5 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -12,11 +12,12 @@ #include <asm/hwcap.h> #include <asm/sysreg.h> -#define MAX_CPU_FEATURES 128 +#define MAX_CPU_FEATURES 192 #define cpu_feature(x) KERNEL_HWCAP_ ## x #define ARM64_SW_FEATURE_OVERRIDE_NOKASLR 0 #define ARM64_SW_FEATURE_OVERRIDE_HVHE 4 +#define ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF 8 #ifndef __ASSEMBLY__ @@ -363,6 +364,7 @@ struct arm64_cpu_capabilities { u8 field_pos; u8 field_width; u8 min_field_value; + u8 max_field_value; u8 hwcap_type; bool sign; unsigned long hwcap; @@ -436,6 +438,7 @@ void cpu_set_feature(unsigned int num); bool cpu_have_feature(unsigned int num); unsigned long cpu_get_elf_hwcap(void); unsigned long cpu_get_elf_hwcap2(void); +unsigned long cpu_get_elf_hwcap3(void); #define cpu_set_named_feature(name) cpu_set_feature(cpu_feature(name)) #define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name)) @@ -586,14 +589,14 @@ static inline bool id_aa64pfr0_32bit_el1(u64 pfr0) { u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_EL1_SHIFT); - return val == ID_AA64PFR0_EL1_ELx_32BIT_64BIT; + return val == ID_AA64PFR0_EL1_EL1_AARCH32; } static inline bool id_aa64pfr0_32bit_el0(u64 pfr0) { u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_EL0_SHIFT); - return val == ID_AA64PFR0_EL1_ELx_32BIT_64BIT; + return val == ID_AA64PFR0_EL1_EL0_AARCH32; } static inline bool id_aa64pfr0_sve(u64 pfr0) @@ -610,6 +613,13 @@ static inline bool id_aa64pfr1_sme(u64 pfr1) return val > 0; } +static inline bool id_aa64pfr0_mpam(u64 pfr0) +{ + u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_MPAM_SHIFT); + + return val > 0; +} + static inline bool id_aa64pfr1_mte(u64 pfr1) { u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_EL1_MTE_SHIFT); @@ -661,6 +671,7 @@ static inline bool supports_clearbhb(int scope) } const struct cpumask *system_32bit_el0_cpumask(void); +const struct cpumask *fallback_32bit_el0_cpumask(void); DECLARE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0); static inline bool system_supports_32bit_el0(void) @@ -768,6 +779,11 @@ static __always_inline bool system_supports_tpidr2(void) return system_supports_sme(); } +static __always_inline bool system_supports_fpmr(void) +{ + return alternative_has_cap_unlikely(ARM64_HAS_FPMR); +} + static __always_inline bool system_supports_cnp(void) { return alternative_has_cap_unlikely(ARM64_HAS_CNP); @@ -825,6 +841,31 @@ static inline bool system_supports_lpa2(void) return cpus_have_final_cap(ARM64_HAS_LPA2); } +static inline bool system_supports_poe(void) +{ + return alternative_has_cap_unlikely(ARM64_HAS_S1POE); +} + +static inline bool system_supports_gcs(void) +{ + return alternative_has_cap_unlikely(ARM64_HAS_GCS); +} + +static inline bool system_supports_haft(void) +{ + return cpus_have_final_cap(ARM64_HAFT); +} + +static __always_inline bool system_supports_mpam(void) +{ + return alternative_has_cap_unlikely(ARM64_MPAM); +} + +static __always_inline bool system_supports_mpam_hcr(void) +{ + return alternative_has_cap_unlikely(ARM64_MPAM_HCR); +} + int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); bool try_emulate_mrs(struct pt_regs *regs, u32 isn); @@ -905,7 +946,9 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur); struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id); +extern struct arm64_ftr_override id_aa64mmfr0_override; extern struct arm64_ftr_override id_aa64mmfr1_override; +extern struct arm64_ftr_override id_aa64mmfr2_override; extern struct arm64_ftr_override id_aa64pfr0_override; extern struct arm64_ftr_override id_aa64pfr1_override; extern struct arm64_ftr_override id_aa64zfr0_override; @@ -915,9 +958,114 @@ extern struct arm64_ftr_override id_aa64isar2_override; extern struct arm64_ftr_override arm64_sw_feature_override; +static inline +u64 arm64_apply_feature_override(u64 val, int feat, int width, + const struct arm64_ftr_override *override) +{ + u64 oval = override->val; + + /* + * When it encounters an invalid override (e.g., an override that + * cannot be honoured due to a missing CPU feature), the early idreg + * override code will set the mask to 0x0 and the value to non-zero for + * the field in question. In order to determine whether the override is + * valid or not for the field we are interested in, we first need to + * disregard bits belonging to other fields. + */ + oval &= GENMASK_ULL(feat + width - 1, feat); + + /* + * The override is valid if all value bits are accounted for in the + * mask. If so, replace the masked bits with the override value. + */ + if (oval == (oval & override->mask)) { + val &= ~override->mask; + val |= oval; + } + + /* Extract the field from the updated value */ + return cpuid_feature_extract_unsigned_field(val, feat); +} + +static inline bool arm64_test_sw_feature_override(int feat) +{ + /* + * Software features are pseudo CPU features that have no underlying + * CPUID system register value to apply the override to. + */ + return arm64_apply_feature_override(0, feat, 4, + &arm64_sw_feature_override); +} + +static inline bool kaslr_disabled_cmdline(void) +{ + return arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_NOKASLR); +} + u32 get_kvm_ipa_limit(void); void dump_cpu_features(void); +static inline bool cpu_has_bti(void) +{ + if (!IS_ENABLED(CONFIG_ARM64_BTI)) + return false; + + return arm64_apply_feature_override(read_cpuid(ID_AA64PFR1_EL1), + ID_AA64PFR1_EL1_BT_SHIFT, 4, + &id_aa64pfr1_override); +} + +static inline bool cpu_has_pac(void) +{ + u64 isar1, isar2; + + if (!IS_ENABLED(CONFIG_ARM64_PTR_AUTH)) + return false; + + isar1 = read_cpuid(ID_AA64ISAR1_EL1); + isar2 = read_cpuid(ID_AA64ISAR2_EL1); + + if (arm64_apply_feature_override(isar1, ID_AA64ISAR1_EL1_APA_SHIFT, 4, + &id_aa64isar1_override)) + return true; + + if (arm64_apply_feature_override(isar1, ID_AA64ISAR1_EL1_API_SHIFT, 4, + &id_aa64isar1_override)) + return true; + + return arm64_apply_feature_override(isar2, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, + &id_aa64isar2_override); +} + +static inline bool cpu_has_lva(void) +{ + u64 mmfr2; + + mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1); + mmfr2 &= ~id_aa64mmfr2_override.mask; + mmfr2 |= id_aa64mmfr2_override.val; + return cpuid_feature_extract_unsigned_field(mmfr2, + ID_AA64MMFR2_EL1_VARange_SHIFT); +} + +static inline bool cpu_has_lpa2(void) +{ +#ifdef CONFIG_ARM64_LPA2 + u64 mmfr0; + int feat; + + mmfr0 = read_sysreg(id_aa64mmfr0_el1); + mmfr0 &= ~id_aa64mmfr0_override.mask; + mmfr0 |= id_aa64mmfr0_override.val; + feat = cpuid_feature_extract_signed_field(mmfr0, + ID_AA64MMFR0_EL1_TGRAN_SHIFT); + + return feat >= ID_AA64MMFR0_EL1_TGRAN_LPA2; +#else + return false; +#endif +} + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 52f076afeb96..6f3f4142e214 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,6 +86,15 @@ #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define ARM_CPU_PART_CORTEX_A78C 0xD4B +#define ARM_CPU_PART_CORTEX_X1C 0xD4C +#define ARM_CPU_PART_CORTEX_X3 0xD4E +#define ARM_CPU_PART_NEOVERSE_V2 0xD4F +#define ARM_CPU_PART_CORTEX_A720 0xD81 +#define ARM_CPU_PART_CORTEX_X4 0xD82 +#define ARM_CPU_PART_NEOVERSE_V3 0xD84 +#define ARM_CPU_PART_CORTEX_X925 0xD85 +#define ARM_CPU_PART_CORTEX_A725 0xD87 +#define ARM_CPU_PART_NEOVERSE_N3 0xD8E #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -113,6 +122,7 @@ #define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803 #define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804 #define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805 +#define QCOM_CPU_PART_ORYON_X1 0x001 #define NVIDIA_CPU_PART_DENVER 0x003 #define NVIDIA_CPU_PART_CARMEL 0x004 @@ -135,6 +145,7 @@ #define APPLE_CPU_PART_M2_AVALANCHE_MAX 0x039 #define AMPERE_CPU_PART_AMPERE1 0xAC3 +#define AMPERE_CPU_PART_AMPERE1A 0xAC4 #define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */ @@ -159,6 +170,15 @@ #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) +#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C) +#define MIDR_CORTEX_X3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X3) +#define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2) +#define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720) +#define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4) +#define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) +#define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) +#define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725) +#define MIDR_NEOVERSE_N3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N3) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) @@ -179,6 +199,7 @@ #define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER) #define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD) #define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER) +#define MIDR_QCOM_ORYON_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_ORYON_X1) #define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER) #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) @@ -196,6 +217,7 @@ #define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX) #define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX) #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) +#define MIDR_AMPERE1A MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1A) #define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ diff --git a/arch/arm64/include/asm/crash_core.h b/arch/arm64/include/asm/crash_reserve.h index 9f5c8d339f44..4afe027a4e7b 100644 --- a/arch/arm64/include/asm/crash_core.h +++ b/arch/arm64/include/asm/crash_reserve.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef _ARM64_CRASH_CORE_H -#define _ARM64_CRASH_CORE_H +#ifndef _ARM64_CRASH_RESERVE_H +#define _ARM64_CRASH_RESERVE_H /* Current arm64 boot protocol requires 2MB alignment */ #define CRASH_ALIGN SZ_2M diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 55f57dfa8e2f..fbb5c99eb2f9 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -132,7 +132,7 @@ static inline void local_daif_inherit(struct pt_regs *regs) trace_hardirqs_on(); if (system_uses_irq_prio_masking()) - gic_write_pmr(regs->pmr_save); + gic_write_pmr(regs->pmr); /* * We can't use local_daif_restore(regs->pstate) here as diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 13d437bcbf58..8f6ba31b8658 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -105,6 +105,7 @@ void kernel_enable_single_step(struct pt_regs *regs); void kernel_disable_single_step(void); int kernel_active_single_step(void); void kernel_rewind_single_step(struct pt_regs *regs); +void kernel_fastforward_single_step(struct pt_regs *regs); #ifdef CONFIG_HAVE_HW_BREAKPOINT int reinstall_suspended_bps(struct pt_regs *regs); diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index b7afaa026842..555c613fd232 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -16,6 +16,32 @@ #include <asm/sysreg.h> #include <linux/irqchip/arm-gic-v3.h> +.macro init_el2_hcr val + mov_q x0, \val + + /* + * Compliant CPUs advertise their VHE-onlyness with + * ID_AA64MMFR4_EL1.E2H0 < 0. On such CPUs HCR_EL2.E2H is RES1, but it + * can reset into an UNKNOWN state and might not read as 1 until it has + * been initialized explicitly. + * + * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but + * don't advertise it (they predate this relaxation). + * + * Initalize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H + * indicating whether the CPU is running in E2H mode. + */ + mrs_s x1, SYS_ID_AA64MMFR4_EL1 + sbfx x1, x1, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH + cmp x1, #0 + b.ge .LnVHE_\@ + + orr x0, x0, #HCR_E2H +.LnVHE_\@: + msr hcr_el2, x0 + isb +.endm + .macro __init_el2_sctlr mov_q x0, INIT_SCTLR_EL2_MMU_OFF msr sctlr_el2, x0 @@ -27,6 +53,14 @@ ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 cbz x0, .Lskip_hcrx_\@ mov_q x0, HCRX_HOST_FLAGS + + /* Enable GCS if supported */ + mrs_s x1, SYS_ID_AA64PFR1_EL1 + ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4 + cbz x1, .Lset_hcrx_\@ + orr x0, x0, #HCRX_EL2_GCSEn + +.Lset_hcrx_\@: msr_s SYS_HCRX_EL2, x0 .Lskip_hcrx_\@: .endm @@ -59,13 +93,14 @@ .macro __init_el2_debug mrs x1, id_aa64dfr0_el1 - sbfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 - cmp x0, #1 - b.lt .Lskip_pmu_\@ // Skip if no PMU present + ubfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 + cmp x0, #ID_AA64DFR0_EL1_PMUVer_NI + ccmp x0, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne + b.eq .Lskip_pmu_\@ // Skip if no PMU present or IMP_DEF mrs x0, pmcr_el0 // Disable debug access traps ubfx x0, x0, #11, #5 // to EL2 and allow access to .Lskip_pmu_\@: - csel x2, xzr, x0, lt // all PMU counters from EL1 + csel x2, xzr, x0, eq // all PMU counters from EL1 /* Statistical profiling */ ubfx x0, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4 @@ -78,7 +113,7 @@ 1 << PMSCR_EL2_PA_SHIFT) msr_s SYS_PMSCR_EL2, x0 // addresses and physical counter .Lskip_spe_el2_\@: - mov x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) + mov x0, #MDCR_EL2_E2PB_MASK orr x2, x2, x0 // If we don't have VHE, then // use EL1&0 translation. @@ -91,7 +126,7 @@ and x0, x0, TRBIDR_EL1_P cbnz x0, .Lskip_trace_\@ // If TRBE is available at EL2 - mov x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT) + mov x0, #MDCR_EL2_E2TB_MASK orr x2, x2, x0 // allow the EL1&0 translation // to own it. @@ -145,7 +180,7 @@ /* Coprocessor traps */ .macro __init_el2_cptr __check_hvhe .LnVHE_\@, x1 - mov x0, #(CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN) + mov x0, #CPACR_EL1_FPEN msr cpacr_el1, x0 b .Lskip_set_cptr_\@ .LnVHE_\@: @@ -164,32 +199,50 @@ mrs x1, id_aa64dfr0_el1 ubfx x1, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4 cmp x1, #3 - b.lt .Lset_debug_fgt_\@ + b.lt .Lskip_spe_fgt_\@ /* Disable PMSNEVFR_EL1 read and write traps */ orr x0, x0, #(1 << 62) -.Lset_debug_fgt_\@: +.Lskip_spe_fgt_\@: msr_s SYS_HDFGRTR_EL2, x0 msr_s SYS_HDFGWTR_EL2, x0 mov x0, xzr mrs x1, id_aa64pfr1_el1 ubfx x1, x1, #ID_AA64PFR1_EL1_SME_SHIFT, #4 - cbz x1, .Lset_pie_fgt_\@ + cbz x1, .Lskip_debug_fgt_\@ /* Disable nVHE traps of TPIDR2 and SMPRI */ orr x0, x0, #HFGxTR_EL2_nSMPRI_EL1_MASK orr x0, x0, #HFGxTR_EL2_nTPIDR2_EL0_MASK -.Lset_pie_fgt_\@: +.Lskip_debug_fgt_\@: mrs_s x1, SYS_ID_AA64MMFR3_EL1 ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4 - cbz x1, .Lset_fgt_\@ + cbz x1, .Lskip_pie_fgt_\@ /* Disable trapping of PIR_EL1 / PIRE0_EL1 */ orr x0, x0, #HFGxTR_EL2_nPIR_EL1 orr x0, x0, #HFGxTR_EL2_nPIRE0_EL1 +.Lskip_pie_fgt_\@: + mrs_s x1, SYS_ID_AA64MMFR3_EL1 + ubfx x1, x1, #ID_AA64MMFR3_EL1_S1POE_SHIFT, #4 + cbz x1, .Lskip_poe_fgt_\@ + + /* Disable trapping of POR_EL0 */ + orr x0, x0, #HFGxTR_EL2_nPOR_EL0 + +.Lskip_poe_fgt_\@: + /* GCS depends on PIE so we don't check it if PIE is absent */ + mrs_s x1, SYS_ID_AA64PFR1_EL1 + ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4 + cbz x1, .Lset_fgt_\@ + + /* Disable traps of access to GCS registers at EL0 and EL1 */ + orr x0, x0, #HFGxTR_EL2_nGCS_EL1_MASK + orr x0, x0, #HFGxTR_EL2_nGCS_EL0_MASK + .Lset_fgt_\@: msr_s SYS_HFGRTR_EL2, x0 msr_s SYS_HFGWTR_EL2, x0 @@ -197,15 +250,37 @@ mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU ubfx x1, x1, #ID_AA64PFR0_EL1_AMU_SHIFT, #4 - cbz x1, .Lskip_fgt_\@ + cbz x1, .Lskip_amu_fgt_\@ msr_s SYS_HAFGRTR_EL2, xzr + +.Lskip_amu_fgt_\@: + .Lskip_fgt_\@: .endm -.macro __init_el2_nvhe_prepare_eret - mov x0, #INIT_PSTATE_EL1 - msr spsr_el2, x0 +.macro __init_el2_gcs + mrs_s x1, SYS_ID_AA64PFR1_EL1 + ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4 + cbz x1, .Lskip_gcs_\@ + + /* Ensure GCS is not enabled when we start trying to do BLs */ + msr_s SYS_GCSCR_EL1, xzr + msr_s SYS_GCSCRE0_EL1, xzr +.Lskip_gcs_\@: +.endm + +.macro __init_el2_mpam + /* Memory Partitioning And Monitoring: disable EL2 traps */ + mrs x1, id_aa64pfr0_el1 + ubfx x0, x1, #ID_AA64PFR0_EL1_MPAM_SHIFT, #4 + cbz x0, .Lskip_mpam_\@ // skip if no MPAM + msr_s SYS_MPAM2_EL2, xzr // use the default partition + // and disable lower traps + mrs_s x0, SYS_MPAMIDR_EL1 + tbz x0, #MPAMIDR_EL1_HAS_HCR_SHIFT, .Lskip_mpam_\@ // skip if no MPAMHCR reg + msr_s SYS_MPAMHCR_EL2, xzr // clear TRAP_MPAMIDR_EL1 -> EL2 +.Lskip_mpam_\@: .endm /** @@ -225,9 +300,11 @@ __init_el2_stage2 __init_el2_gicv3 __init_el2_hstr + __init_el2_mpam __init_el2_nvhe_idregs __init_el2_cptr __init_el2_fgt + __init_el2_gcs .endm #ifndef __KVM_NVHE_HYPERVISOR__ @@ -276,7 +353,7 @@ // (h)VHE case mrs x0, cpacr_el1 // Disable SVE traps - orr x0, x0, #(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN) + orr x0, x0, #CPACR_EL1_ZEN msr cpacr_el1, x0 b .Lskip_set_cptr_\@ @@ -297,7 +374,7 @@ // (h)VHE case mrs x0, cpacr_el1 // Disable SME traps - orr x0, x0, #(CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN) + orr x0, x0, #CPACR_EL1_SMEN msr cpacr_el1, x0 b .Lskip_set_cptr_sme_\@ diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 97932fbf973d..3f93f4eef953 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -201,16 +201,16 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, #define COMPAT_ELF_PLATFORM ("v8l") #endif -#ifdef CONFIG_COMPAT - -/* PIE load location for compat arm. Must match ARM ELF_ET_DYN_BASE. */ -#define COMPAT_ELF_ET_DYN_BASE 0x000400000UL - /* AArch32 registers. */ #define COMPAT_ELF_NGREG 18 typedef unsigned int compat_elf_greg_t; typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; +#ifdef CONFIG_COMPAT + +/* PIE load location for compat arm. Must match ARM ELF_ET_DYN_BASE. */ +#define COMPAT_ELF_ET_DYN_BASE 0x000400000UL + /* AArch32 EABI. */ #define EF_ARM_EABI_MASK 0xff000000 int compat_elf_check_arch(const struct elf32_hdr *); diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 353fe08546cf..d1b1a33f9a8b 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -10,63 +10,64 @@ #include <asm/memory.h> #include <asm/sysreg.h> -#define ESR_ELx_EC_UNKNOWN (0x00) -#define ESR_ELx_EC_WFx (0x01) +#define ESR_ELx_EC_UNKNOWN UL(0x00) +#define ESR_ELx_EC_WFx UL(0x01) /* Unallocated EC: 0x02 */ -#define ESR_ELx_EC_CP15_32 (0x03) -#define ESR_ELx_EC_CP15_64 (0x04) -#define ESR_ELx_EC_CP14_MR (0x05) -#define ESR_ELx_EC_CP14_LS (0x06) -#define ESR_ELx_EC_FP_ASIMD (0x07) -#define ESR_ELx_EC_CP10_ID (0x08) /* EL2 only */ -#define ESR_ELx_EC_PAC (0x09) /* EL2 and above */ +#define ESR_ELx_EC_CP15_32 UL(0x03) +#define ESR_ELx_EC_CP15_64 UL(0x04) +#define ESR_ELx_EC_CP14_MR UL(0x05) +#define ESR_ELx_EC_CP14_LS UL(0x06) +#define ESR_ELx_EC_FP_ASIMD UL(0x07) +#define ESR_ELx_EC_CP10_ID UL(0x08) /* EL2 only */ +#define ESR_ELx_EC_PAC UL(0x09) /* EL2 and above */ /* Unallocated EC: 0x0A - 0x0B */ -#define ESR_ELx_EC_CP14_64 (0x0C) -#define ESR_ELx_EC_BTI (0x0D) -#define ESR_ELx_EC_ILL (0x0E) +#define ESR_ELx_EC_CP14_64 UL(0x0C) +#define ESR_ELx_EC_BTI UL(0x0D) +#define ESR_ELx_EC_ILL UL(0x0E) /* Unallocated EC: 0x0F - 0x10 */ -#define ESR_ELx_EC_SVC32 (0x11) -#define ESR_ELx_EC_HVC32 (0x12) /* EL2 only */ -#define ESR_ELx_EC_SMC32 (0x13) /* EL2 and above */ +#define ESR_ELx_EC_SVC32 UL(0x11) +#define ESR_ELx_EC_HVC32 UL(0x12) /* EL2 only */ +#define ESR_ELx_EC_SMC32 UL(0x13) /* EL2 and above */ /* Unallocated EC: 0x14 */ -#define ESR_ELx_EC_SVC64 (0x15) -#define ESR_ELx_EC_HVC64 (0x16) /* EL2 and above */ -#define ESR_ELx_EC_SMC64 (0x17) /* EL2 and above */ -#define ESR_ELx_EC_SYS64 (0x18) -#define ESR_ELx_EC_SVE (0x19) -#define ESR_ELx_EC_ERET (0x1a) /* EL2 only */ +#define ESR_ELx_EC_SVC64 UL(0x15) +#define ESR_ELx_EC_HVC64 UL(0x16) /* EL2 and above */ +#define ESR_ELx_EC_SMC64 UL(0x17) /* EL2 and above */ +#define ESR_ELx_EC_SYS64 UL(0x18) +#define ESR_ELx_EC_SVE UL(0x19) +#define ESR_ELx_EC_ERET UL(0x1a) /* EL2 only */ /* Unallocated EC: 0x1B */ -#define ESR_ELx_EC_FPAC (0x1C) /* EL1 and above */ -#define ESR_ELx_EC_SME (0x1D) +#define ESR_ELx_EC_FPAC UL(0x1C) /* EL1 and above */ +#define ESR_ELx_EC_SME UL(0x1D) /* Unallocated EC: 0x1E */ -#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */ -#define ESR_ELx_EC_IABT_LOW (0x20) -#define ESR_ELx_EC_IABT_CUR (0x21) -#define ESR_ELx_EC_PC_ALIGN (0x22) +#define ESR_ELx_EC_IMP_DEF UL(0x1f) /* EL3 only */ +#define ESR_ELx_EC_IABT_LOW UL(0x20) +#define ESR_ELx_EC_IABT_CUR UL(0x21) +#define ESR_ELx_EC_PC_ALIGN UL(0x22) /* Unallocated EC: 0x23 */ -#define ESR_ELx_EC_DABT_LOW (0x24) -#define ESR_ELx_EC_DABT_CUR (0x25) -#define ESR_ELx_EC_SP_ALIGN (0x26) -#define ESR_ELx_EC_MOPS (0x27) -#define ESR_ELx_EC_FP_EXC32 (0x28) +#define ESR_ELx_EC_DABT_LOW UL(0x24) +#define ESR_ELx_EC_DABT_CUR UL(0x25) +#define ESR_ELx_EC_SP_ALIGN UL(0x26) +#define ESR_ELx_EC_MOPS UL(0x27) +#define ESR_ELx_EC_FP_EXC32 UL(0x28) /* Unallocated EC: 0x29 - 0x2B */ -#define ESR_ELx_EC_FP_EXC64 (0x2C) -/* Unallocated EC: 0x2D - 0x2E */ -#define ESR_ELx_EC_SERROR (0x2F) -#define ESR_ELx_EC_BREAKPT_LOW (0x30) -#define ESR_ELx_EC_BREAKPT_CUR (0x31) -#define ESR_ELx_EC_SOFTSTP_LOW (0x32) -#define ESR_ELx_EC_SOFTSTP_CUR (0x33) -#define ESR_ELx_EC_WATCHPT_LOW (0x34) -#define ESR_ELx_EC_WATCHPT_CUR (0x35) +#define ESR_ELx_EC_FP_EXC64 UL(0x2C) +#define ESR_ELx_EC_GCS UL(0x2D) +/* Unallocated EC: 0x2E */ +#define ESR_ELx_EC_SERROR UL(0x2F) +#define ESR_ELx_EC_BREAKPT_LOW UL(0x30) +#define ESR_ELx_EC_BREAKPT_CUR UL(0x31) +#define ESR_ELx_EC_SOFTSTP_LOW UL(0x32) +#define ESR_ELx_EC_SOFTSTP_CUR UL(0x33) +#define ESR_ELx_EC_WATCHPT_LOW UL(0x34) +#define ESR_ELx_EC_WATCHPT_CUR UL(0x35) /* Unallocated EC: 0x36 - 0x37 */ -#define ESR_ELx_EC_BKPT32 (0x38) +#define ESR_ELx_EC_BKPT32 UL(0x38) /* Unallocated EC: 0x39 */ -#define ESR_ELx_EC_VECTOR32 (0x3A) /* EL2 only */ +#define ESR_ELx_EC_VECTOR32 UL(0x3A) /* EL2 only */ /* Unallocated EC: 0x3B */ -#define ESR_ELx_EC_BRK64 (0x3C) +#define ESR_ELx_EC_BRK64 UL(0x3C) /* Unallocated EC: 0x3D - 0x3F */ -#define ESR_ELx_EC_MAX (0x3F) +#define ESR_ELx_EC_MAX UL(0x3F) #define ESR_ELx_EC_SHIFT (26) #define ESR_ELx_EC_WIDTH (6) @@ -117,15 +118,17 @@ #define ESR_ELx_FSC_ACCESS (0x08) #define ESR_ELx_FSC_FAULT (0x04) #define ESR_ELx_FSC_PERM (0x0C) -#define ESR_ELx_FSC_SEA_TTW0 (0x14) -#define ESR_ELx_FSC_SEA_TTW1 (0x15) -#define ESR_ELx_FSC_SEA_TTW2 (0x16) -#define ESR_ELx_FSC_SEA_TTW3 (0x17) +#define ESR_ELx_FSC_SEA_TTW(n) (0x14 + (n)) #define ESR_ELx_FSC_SECC (0x18) -#define ESR_ELx_FSC_SECC_TTW0 (0x1c) -#define ESR_ELx_FSC_SECC_TTW1 (0x1d) -#define ESR_ELx_FSC_SECC_TTW2 (0x1e) -#define ESR_ELx_FSC_SECC_TTW3 (0x1f) +#define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n)) + +/* Status codes for individual page table levels */ +#define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + (n)) +#define ESR_ELx_FSC_PERM_L(n) (ESR_ELx_FSC_PERM + (n)) + +#define ESR_ELx_FSC_FAULT_nL (0x2C) +#define ESR_ELx_FSC_FAULT_L(n) (((n) < 0 ? ESR_ELx_FSC_FAULT_nL : \ + ESR_ELx_FSC_FAULT) + (n)) /* ISS field definitions for Data Aborts */ #define ESR_ELx_ISV_SHIFT (24) @@ -158,6 +161,8 @@ #define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0)) /* ISS field definitions for exceptions taken in to Hyp */ +#define ESR_ELx_FSC_ADDRSZ (0x00) +#define ESR_ELx_FSC_ADDRSZ_L(n) (ESR_ELx_FSC_ADDRSZ + (n)) #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) @@ -382,9 +387,39 @@ #define ESR_ELx_MOPS_ISS_SRCREG(esr) (((esr) & (UL(0x1f) << 5)) >> 5) #define ESR_ELx_MOPS_ISS_SIZEREG(esr) (((esr) & (UL(0x1f) << 0)) >> 0) +/* ISS field definitions for GCS */ +#define ESR_ELx_ExType_SHIFT (20) +#define ESR_ELx_ExType_MASK GENMASK(23, 20) +#define ESR_ELx_Raddr_SHIFT (10) +#define ESR_ELx_Raddr_MASK GENMASK(14, 10) +#define ESR_ELx_Rn_SHIFT (5) +#define ESR_ELx_Rn_MASK GENMASK(9, 5) +#define ESR_ELx_Rvalue_SHIFT 5 +#define ESR_ELx_Rvalue_MASK GENMASK(9, 5) +#define ESR_ELx_IT_SHIFT (0) +#define ESR_ELx_IT_MASK GENMASK(4, 0) + +#define ESR_ELx_ExType_DATA_CHECK 0 +#define ESR_ELx_ExType_EXLOCK 1 +#define ESR_ELx_ExType_STR 2 + +#define ESR_ELx_IT_RET 0 +#define ESR_ELx_IT_GCSPOPM 1 +#define ESR_ELx_IT_RET_KEYA 2 +#define ESR_ELx_IT_RET_KEYB 3 +#define ESR_ELx_IT_GCSSS1 4 +#define ESR_ELx_IT_GCSSS2 5 +#define ESR_ELx_IT_GCSPOPCX 6 +#define ESR_ELx_IT_GCSPOPX 7 + #ifndef __ASSEMBLY__ #include <asm/types.h> +static inline unsigned long esr_brk_comment(unsigned long esr) +{ + return esr & ESR_ELx_BRK64_ISS_COMMENT_MASK; +} + static inline bool esr_is_data_abort(unsigned long esr) { const unsigned long ec = ESR_ELx_EC(esr); @@ -392,19 +427,53 @@ static inline bool esr_is_data_abort(unsigned long esr) return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR; } +static inline bool esr_is_cfi_brk(unsigned long esr) +{ + return ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 && + (esr_brk_comment(esr) & ~CFI_BRK_IMM_MASK) == CFI_BRK_IMM_BASE; +} + static inline bool esr_fsc_is_translation_fault(unsigned long esr) { - return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT; + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_FAULT_L(3)) || + (esr == ESR_ELx_FSC_FAULT_L(2)) || + (esr == ESR_ELx_FSC_FAULT_L(1)) || + (esr == ESR_ELx_FSC_FAULT_L(0)) || + (esr == ESR_ELx_FSC_FAULT_L(-1)); } static inline bool esr_fsc_is_permission_fault(unsigned long esr) { - return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM; + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_PERM_L(3)) || + (esr == ESR_ELx_FSC_PERM_L(2)) || + (esr == ESR_ELx_FSC_PERM_L(1)) || + (esr == ESR_ELx_FSC_PERM_L(0)); } static inline bool esr_fsc_is_access_flag_fault(unsigned long esr) { - return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS; + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_ACCESS_L(3)) || + (esr == ESR_ELx_FSC_ACCESS_L(2)) || + (esr == ESR_ELx_FSC_ACCESS_L(1)) || + (esr == ESR_ELx_FSC_ACCESS_L(0)); +} + +/* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */ +static inline bool esr_iss_is_eretax(unsigned long esr) +{ + return esr & ESR_ELx_ERET_ISS_ERET; +} + +/* Indicate which key is used for ERETAx (false: A-Key, true: B-Key) */ +static inline bool esr_iss_is_eretab(unsigned long esr) +{ + return esr & ESR_ELx_ERET_ISS_ERETA; } const char *esr_get_class_string(unsigned long esr); diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index ad688e157c9b..d48fc16584cd 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -57,6 +57,8 @@ void do_el0_undef(struct pt_regs *regs, unsigned long esr); void do_el1_undef(struct pt_regs *regs, unsigned long esr); void do_el0_bti(struct pt_regs *regs); void do_el1_bti(struct pt_regs *regs, unsigned long esr); +void do_el0_gcs(struct pt_regs *regs, unsigned long esr); +void do_el1_gcs(struct pt_regs *regs, unsigned long esr); void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr, struct pt_regs *regs); void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs); @@ -73,8 +75,9 @@ void do_el0_svc_compat(struct pt_regs *regs); void do_el0_fpac(struct pt_regs *regs, unsigned long esr); void do_el1_fpac(struct pt_regs *regs, unsigned long esr); void do_el0_mops(struct pt_regs *regs, unsigned long esr); +void do_el1_mops(struct pt_regs *regs, unsigned long esr); void do_serror(struct pt_regs *regs, unsigned long esr); -void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags); +void do_signal(struct pt_regs *regs); void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far); #endif /* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/include/asm/fb.h b/arch/arm64/include/asm/fb.h deleted file mode 100644 index 1a495d8fb2ce..000000000000 --- a/arch/arm64/include/asm/fb.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 ARM Ltd. - */ -#ifndef __ASM_FB_H_ -#define __ASM_FB_H_ - -#include <asm-generic/fb.h> - -#endif /* __ASM_FB_H_ */ diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 58c294a96676..87e307804b99 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -87,6 +87,7 @@ enum fixed_addresses { FIX_PTE, FIX_PMD, FIX_PUD, + FIX_P4D, FIX_PGD, __end_of_fixed_addresses @@ -100,7 +101,6 @@ enum fixed_addresses { #define FIXMAP_PAGE_IO __pgprot(PROT_DEVICE_nGnRE) void __init early_fixmap_init(void); -void __init fixmap_copy(pgd_t *pgdir); #define __early_set_fixmap __set_fixmap diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index b67b89c54e1c..f2a84efc3618 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -21,7 +21,6 @@ #include <linux/stddef.h> #include <linux/types.h> -#ifdef CONFIG_COMPAT /* Masks for extracting the FPSR and FPCR from the FPSCR */ #define VFP_FPSCR_STAT_MASK 0xf800009f #define VFP_FPSCR_CTRL_MASK 0x07f79f00 @@ -30,7 +29,6 @@ * control/status register. */ #define VFP_STATE_SIZE ((32 * 8) + 4) -#endif static inline unsigned long cpacr_save_enable_kernel_sve(void) { @@ -89,6 +87,7 @@ struct cpu_fp_state { void *sve_state; void *sme_state; u64 *svcr; + u64 *fpmr; unsigned int sve_vl; unsigned int sme_vl; enum fp_type *fp_type; @@ -154,8 +153,7 @@ extern void cpu_enable_sve(const struct arm64_cpu_capabilities *__unused); extern void cpu_enable_sme(const struct arm64_cpu_capabilities *__unused); extern void cpu_enable_sme2(const struct arm64_cpu_capabilities *__unused); extern void cpu_enable_fa64(const struct arm64_cpu_capabilities *__unused); - -extern u64 read_smcr_features(void); +extern void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__unused); /* * Helpers to translate bit indices in sve_vq_map to VQ values (and diff --git a/arch/arm64/include/asm/fpu.h b/arch/arm64/include/asm/fpu.h new file mode 100644 index 000000000000..2ae50bdce59b --- /dev/null +++ b/arch/arm64/include/asm/fpu.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 SiFive + */ + +#ifndef __ASM_FPU_H +#define __ASM_FPU_H + +#include <asm/neon.h> + +#define kernel_fpu_available() cpu_has_neon() +#define kernel_fpu_begin() kernel_neon_begin() +#define kernel_fpu_end() kernel_neon_end() + +#endif /* ! __ASM_FPU_H */ diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index ab158196480c..bfe3ce9df197 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -12,17 +12,6 @@ #define HAVE_FUNCTION_GRAPH_FP_TEST -/* - * HAVE_FUNCTION_GRAPH_RET_ADDR_PTR means that the architecture can provide a - * "return address pointer" which can be used to uniquely identify a return - * address which has been overwritten. - * - * On arm64 we use the address of the caller's frame record, which remains the - * same for the lifetime of the instrumented function, unlike the return - * address in the LR. - */ -#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR - #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS #define ARCH_SUPPORTS_FTRACE_OPS 1 #else @@ -63,10 +52,15 @@ extern unsigned long ftrace_graph_call; extern void return_to_handler(void); unsigned long ftrace_call_adjust(unsigned long addr); +unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip); +#define ftrace_get_symaddr(fentry_ip) arch_ftrace_get_symaddr(fentry_ip) #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS +#define HAVE_ARCH_FTRACE_REGS struct dyn_ftrace; struct ftrace_ops; +struct ftrace_regs; +#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) #define arch_ftrace_get_regs(regs) NULL @@ -74,7 +68,7 @@ struct ftrace_ops; * Note: sizeof(struct ftrace_regs) must be a multiple of 16 to ensure correct * stack alignment */ -struct ftrace_regs { +struct __arch_ftrace_regs { /* x0 - x8 */ unsigned long regs[9]; @@ -94,49 +88,81 @@ struct ftrace_regs { static __always_inline unsigned long ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs) { - return fregs->pc; + return arch_ftrace_regs(fregs)->pc; } static __always_inline void ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long pc) { - fregs->pc = pc; + arch_ftrace_regs(fregs)->pc = pc; } static __always_inline unsigned long ftrace_regs_get_stack_pointer(const struct ftrace_regs *fregs) { - return fregs->sp; + return arch_ftrace_regs(fregs)->sp; } static __always_inline unsigned long ftrace_regs_get_argument(struct ftrace_regs *fregs, unsigned int n) { if (n < 8) - return fregs->regs[n]; + return arch_ftrace_regs(fregs)->regs[n]; return 0; } static __always_inline unsigned long ftrace_regs_get_return_value(const struct ftrace_regs *fregs) { - return fregs->regs[0]; + return arch_ftrace_regs(fregs)->regs[0]; } static __always_inline void ftrace_regs_set_return_value(struct ftrace_regs *fregs, unsigned long ret) { - fregs->regs[0] = ret; + arch_ftrace_regs(fregs)->regs[0] = ret; } static __always_inline void ftrace_override_function_with_return(struct ftrace_regs *fregs) { - fregs->pc = fregs->lr; + arch_ftrace_regs(fregs)->pc = arch_ftrace_regs(fregs)->lr; +} + +static __always_inline unsigned long +ftrace_regs_get_frame_pointer(const struct ftrace_regs *fregs) +{ + return arch_ftrace_regs(fregs)->fp; +} + +static __always_inline unsigned long +ftrace_regs_get_return_address(const struct ftrace_regs *fregs) +{ + return arch_ftrace_regs(fregs)->lr; +} + +static __always_inline struct pt_regs * +ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs) +{ + struct __arch_ftrace_regs *afregs = arch_ftrace_regs(fregs); + + memcpy(regs->regs, afregs->regs, sizeof(afregs->regs)); + regs->sp = afregs->sp; + regs->pc = afregs->pc; + regs->regs[29] = afregs->fp; + regs->regs[30] = afregs->lr; + return regs; } +#define arch_ftrace_fill_perf_regs(fregs, _regs) do { \ + (_regs)->pc = arch_ftrace_regs(fregs)->pc; \ + (_regs)->regs[29] = arch_ftrace_regs(fregs)->fp; \ + (_regs)->sp = arch_ftrace_regs(fregs)->sp; \ + (_regs)->pstate = PSR_MODE_EL1h; \ + } while (0) + int ftrace_regs_query_register_offset(const char *name); int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); @@ -154,7 +180,7 @@ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, * The ftrace trampoline will return to this address instead of the * instrumented function. */ - fregs->direct_tramp = addr; + arch_ftrace_regs(fregs)->direct_tramp = addr; } #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ @@ -194,23 +220,6 @@ static inline bool arch_syscall_match_sym_name(const char *sym, #ifndef __ASSEMBLY__ #ifdef CONFIG_FUNCTION_GRAPH_TRACER -struct fgraph_ret_regs { - /* x0 - x7 */ - unsigned long regs[8]; - - unsigned long fp; - unsigned long __unused; -}; - -static inline unsigned long fgraph_ret_regs_return_value(struct fgraph_ret_regs *ret_regs) -{ - return ret_regs->regs[0]; -} - -static inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs *ret_regs) -{ - return ret_regs->fp; -} void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, unsigned long frame_pointer); diff --git a/arch/arm64/include/asm/gcs.h b/arch/arm64/include/asm/gcs.h new file mode 100644 index 000000000000..f50660603ecf --- /dev/null +++ b/arch/arm64/include/asm/gcs.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 ARM Ltd. + */ +#ifndef __ASM_GCS_H +#define __ASM_GCS_H + +#include <asm/types.h> +#include <asm/uaccess.h> + +struct kernel_clone_args; +struct ksignal; + +static inline void gcsb_dsync(void) +{ + asm volatile(".inst 0xd503227f" : : : "memory"); +} + +static inline void gcsstr(u64 *addr, u64 val) +{ + register u64 *_addr __asm__ ("x0") = addr; + register long _val __asm__ ("x1") = val; + + /* GCSSTTR x1, x0 */ + asm volatile( + ".inst 0xd91f1c01\n" + : + : "rZ" (_val), "r" (_addr) + : "memory"); +} + +static inline void gcsss1(u64 Xt) +{ + asm volatile ( + "sys #3, C7, C7, #2, %0\n" + : + : "rZ" (Xt) + : "memory"); +} + +static inline u64 gcsss2(void) +{ + u64 Xt; + + asm volatile( + "SYSL %0, #3, C7, C7, #3\n" + : "=r" (Xt) + : + : "memory"); + + return Xt; +} + +#define PR_SHADOW_STACK_SUPPORTED_STATUS_MASK \ + (PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | PR_SHADOW_STACK_PUSH) + +#ifdef CONFIG_ARM64_GCS + +static inline bool task_gcs_el0_enabled(struct task_struct *task) +{ + return current->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE; +} + +void gcs_set_el0_mode(struct task_struct *task); +void gcs_free(struct task_struct *task); +void gcs_preserve_current_state(void); +unsigned long gcs_alloc_thread_stack(struct task_struct *tsk, + const struct kernel_clone_args *args); + +static inline int gcs_check_locked(struct task_struct *task, + unsigned long new_val) +{ + unsigned long cur_val = task->thread.gcs_el0_mode; + + cur_val &= task->thread.gcs_el0_locked; + new_val &= task->thread.gcs_el0_locked; + + if (cur_val != new_val) + return -EBUSY; + + return 0; +} + +#else + +static inline bool task_gcs_el0_enabled(struct task_struct *task) +{ + return false; +} + +static inline void gcs_set_el0_mode(struct task_struct *task) { } +static inline void gcs_free(struct task_struct *task) { } +static inline void gcs_preserve_current_state(void) { } +static inline unsigned long gcs_alloc_thread_stack(struct task_struct *tsk, + const struct kernel_clone_args *args) +{ + return -ENOTSUPP; +} +static inline int gcs_check_locked(struct task_struct *task, + unsigned long new_val) +{ + return 0; +} + +#endif + +#endif diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index 2ddc33d93b13..07fbf5bf85a7 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -11,6 +11,7 @@ #define __ASM_HUGETLB_H #include <asm/cacheflush.h> +#include <asm/mte.h> #include <asm/page.h> #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION @@ -18,11 +19,18 @@ extern bool arch_hugetlb_migration_supported(struct hstate *h); #endif -static inline void arch_clear_hugepage_flags(struct page *page) +static inline void arch_clear_hugetlb_flags(struct folio *folio) { - clear_bit(PG_dcache_clean, &page->flags); + clear_bit(PG_dcache_clean, &folio->flags); + +#ifdef CONFIG_ARM64_MTE + if (system_supports_mte()) { + clear_bit(PG_mte_tagged, &folio->flags); + clear_bit(PG_mte_lock, &folio->flags); + } +#endif } -#define arch_clear_hugepage_flags arch_clear_hugepage_flags +#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags); #define arch_make_huge_pte arch_make_huge_pte @@ -34,8 +42,8 @@ extern int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty); #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR -extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep); +extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT extern void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep); @@ -46,7 +54,7 @@ extern pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_GET -extern pte_t huge_ptep_get(pte_t *ptep); +extern pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep); void __init arm64_hugetlb_cma_reserve(void); @@ -68,12 +76,22 @@ static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma, { unsigned long stride = huge_page_size(hstate_vma(vma)); - if (stride == PMD_SIZE) - __flush_tlb_range(vma, start, end, stride, false, 2); - else if (stride == PUD_SIZE) - __flush_tlb_range(vma, start, end, stride, false, 1); - else - __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0); + switch (stride) { +#ifndef __PAGETABLE_PMD_FOLDED + case PUD_SIZE: + __flush_tlb_range(vma, start, end, PUD_SIZE, false, 1); + break; +#endif + case CONT_PMD_SIZE: + case PMD_SIZE: + __flush_tlb_range(vma, start, end, PMD_SIZE, false, 2); + break; + case CONT_PTE_SIZE: + __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 3); + break; + default: + __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN); + } } #endif /* __ASM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index 84055329cd8b..bd81cf17744a 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -59,7 +59,6 @@ static inline void decode_ctrl_reg(u32 reg, /* Watchpoints */ #define ARM_BREAKPOINT_LOAD 1 #define ARM_BREAKPOINT_STORE 2 -#define AARCH64_ESR_ACCESS_MASK (1 << 6) /* Lengths */ #define ARM_BREAKPOINT_LEN_1 0x1 diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index cd71e09ea14d..1c3f9617d54f 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -92,6 +92,22 @@ #define KERNEL_HWCAP_SB __khwcap_feature(SB) #define KERNEL_HWCAP_PACA __khwcap_feature(PACA) #define KERNEL_HWCAP_PACG __khwcap_feature(PACG) +#define KERNEL_HWCAP_GCS __khwcap_feature(GCS) +#define KERNEL_HWCAP_CMPBR __khwcap_feature(CMPBR) +#define KERNEL_HWCAP_FPRCVT __khwcap_feature(FPRCVT) +#define KERNEL_HWCAP_F8MM8 __khwcap_feature(F8MM8) +#define KERNEL_HWCAP_F8MM4 __khwcap_feature(F8MM4) +#define KERNEL_HWCAP_SVE_F16MM __khwcap_feature(SVE_F16MM) +#define KERNEL_HWCAP_SVE_ELTPERM __khwcap_feature(SVE_ELTPERM) +#define KERNEL_HWCAP_SVE_AES2 __khwcap_feature(SVE_AES2) +#define KERNEL_HWCAP_SVE_BFSCALE __khwcap_feature(SVE_BFSCALE) +#define KERNEL_HWCAP_SVE2P2 __khwcap_feature(SVE2P2) +#define KERNEL_HWCAP_SME2P2 __khwcap_feature(SME2P2) +#define KERNEL_HWCAP_SME_SBITPERM __khwcap_feature(SME_SBITPERM) +#define KERNEL_HWCAP_SME_AES __khwcap_feature(SME_AES) +#define KERNEL_HWCAP_SME_SFEXPA __khwcap_feature(SME_SFEXPA) +#define KERNEL_HWCAP_SME_STMOP __khwcap_feature(SME_STMOP) +#define KERNEL_HWCAP_SME_SMOP4 __khwcap_feature(SME_SMOP4) #define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 64) #define KERNEL_HWCAP_DCPODP __khwcap2_feature(DCPODP) @@ -142,6 +158,24 @@ #define KERNEL_HWCAP_SVE_B16B16 __khwcap2_feature(SVE_B16B16) #define KERNEL_HWCAP_LRCPC3 __khwcap2_feature(LRCPC3) #define KERNEL_HWCAP_LSE128 __khwcap2_feature(LSE128) +#define KERNEL_HWCAP_FPMR __khwcap2_feature(FPMR) +#define KERNEL_HWCAP_LUT __khwcap2_feature(LUT) +#define KERNEL_HWCAP_FAMINMAX __khwcap2_feature(FAMINMAX) +#define KERNEL_HWCAP_F8CVT __khwcap2_feature(F8CVT) +#define KERNEL_HWCAP_F8FMA __khwcap2_feature(F8FMA) +#define KERNEL_HWCAP_F8DP4 __khwcap2_feature(F8DP4) +#define KERNEL_HWCAP_F8DP2 __khwcap2_feature(F8DP2) +#define KERNEL_HWCAP_F8E4M3 __khwcap2_feature(F8E4M3) +#define KERNEL_HWCAP_F8E5M2 __khwcap2_feature(F8E5M2) +#define KERNEL_HWCAP_SME_LUTV2 __khwcap2_feature(SME_LUTV2) +#define KERNEL_HWCAP_SME_F8F16 __khwcap2_feature(SME_F8F16) +#define KERNEL_HWCAP_SME_F8F32 __khwcap2_feature(SME_F8F32) +#define KERNEL_HWCAP_SME_SF8FMA __khwcap2_feature(SME_SF8FMA) +#define KERNEL_HWCAP_SME_SF8DP4 __khwcap2_feature(SME_SF8DP4) +#define KERNEL_HWCAP_SME_SF8DP2 __khwcap2_feature(SME_SF8DP2) +#define KERNEL_HWCAP_POE __khwcap2_feature(POE) + +#define __khwcap3_feature(x) (const_ilog2(HWCAP3_ ## x) + 128) /* * This yields a mask that user programs can use to figure out what @@ -149,11 +183,13 @@ */ #define ELF_HWCAP cpu_get_elf_hwcap() #define ELF_HWCAP2 cpu_get_elf_hwcap2() +#define ELF_HWCAP3 cpu_get_elf_hwcap3() #ifdef CONFIG_COMPAT #define COMPAT_ELF_HWCAP (compat_elf_hwcap) #define COMPAT_ELF_HWCAP2 (compat_elf_hwcap2) -extern unsigned int compat_elf_hwcap, compat_elf_hwcap2; +#define COMPAT_ELF_HWCAP3 (compat_elf_hwcap3) +extern unsigned int compat_elf_hwcap, compat_elf_hwcap2, compat_elf_hwcap3; #endif enum { diff --git a/arch/arm64/include/asm/hyperv-tlfs.h b/arch/arm64/include/asm/hyperv-tlfs.h deleted file mode 100644 index bc6c7ac934a1..000000000000 --- a/arch/arm64/include/asm/hyperv-tlfs.h +++ /dev/null @@ -1,78 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -/* - * This file contains definitions from the Hyper-V Hypervisor Top-Level - * Functional Specification (TLFS): - * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs - * - * Copyright (C) 2021, Microsoft, Inc. - * - * Author : Michael Kelley <mikelley@microsoft.com> - */ - -#ifndef _ASM_HYPERV_TLFS_H -#define _ASM_HYPERV_TLFS_H - -#include <linux/types.h> - -/* - * All data structures defined in the TLFS that are shared between Hyper-V - * and a guest VM use Little Endian byte ordering. This matches the default - * byte ordering of Linux running on ARM64, so no special handling is required. - */ - -/* - * These Hyper-V registers provide information equivalent to the CPUID - * instruction on x86/x64. - */ -#define HV_REGISTER_HYPERVISOR_VERSION 0x00000100 /*CPUID 0x40000002 */ -#define HV_REGISTER_FEATURES 0x00000200 /*CPUID 0x40000003 */ -#define HV_REGISTER_ENLIGHTENMENTS 0x00000201 /*CPUID 0x40000004 */ - -/* - * Group C Features. See the asm-generic version of hyperv-tlfs.h - * for a description of Feature Groups. - */ - -/* Crash MSRs available */ -#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(8) - -/* STIMER direct mode is available */ -#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(13) - -/* - * Synthetic register definitions equivalent to MSRs on x86/x64 - */ -#define HV_REGISTER_CRASH_P0 0x00000210 -#define HV_REGISTER_CRASH_P1 0x00000211 -#define HV_REGISTER_CRASH_P2 0x00000212 -#define HV_REGISTER_CRASH_P3 0x00000213 -#define HV_REGISTER_CRASH_P4 0x00000214 -#define HV_REGISTER_CRASH_CTL 0x00000215 - -#define HV_REGISTER_GUEST_OSID 0x00090002 -#define HV_REGISTER_VP_INDEX 0x00090003 -#define HV_REGISTER_TIME_REF_COUNT 0x00090004 -#define HV_REGISTER_REFERENCE_TSC 0x00090017 - -#define HV_REGISTER_SINT0 0x000A0000 -#define HV_REGISTER_SCONTROL 0x000A0010 -#define HV_REGISTER_SIEFP 0x000A0012 -#define HV_REGISTER_SIMP 0x000A0013 -#define HV_REGISTER_EOM 0x000A0014 - -#define HV_REGISTER_STIMER0_CONFIG 0x000B0000 -#define HV_REGISTER_STIMER0_COUNT 0x000B0001 - -union hv_msi_entry { - u64 as_uint64[2]; - struct { - u64 address; - u32 data; - u32 reserved; - } __packed; -}; - -#include <asm-generic/hyperv-tlfs.h> - -#endif diff --git a/arch/arm64/include/asm/hypervisor.h b/arch/arm64/include/asm/hypervisor.h index 0ae427f352c8..409e239834d1 100644 --- a/arch/arm64/include/asm/hypervisor.h +++ b/arch/arm64/include/asm/hypervisor.h @@ -7,4 +7,15 @@ void kvm_init_hyp_services(void); bool kvm_arm_hyp_service_available(u32 func_id); +#ifdef CONFIG_ARM_PKVM_GUEST +void pkvm_init_hyp_services(void); +#else +static inline void pkvm_init_hyp_services(void) { }; +#endif + +static inline void kvm_arch_init_hyp_services(void) +{ + pkvm_init_hyp_services(); +}; + #endif diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index db1aeacd4cd9..e390c432f546 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -135,6 +135,12 @@ enum aarch64_insn_special_register { AARCH64_INSN_SPCLREG_SP_EL2 = 0xF210 }; +enum aarch64_insn_system_register { + AARCH64_INSN_SYSREG_TPIDR_EL1 = 0x4684, + AARCH64_INSN_SYSREG_TPIDR_EL2 = 0x6682, + AARCH64_INSN_SYSREG_SP_EL0 = 0x4208, +}; + enum aarch64_insn_variant { AARCH64_INSN_VARIANT_32BIT, AARCH64_INSN_VARIANT_64BIT @@ -347,6 +353,7 @@ __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) __AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000) __AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000) __AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000) +__AARCH64_INSN_FUNCS(mops, 0x3B200C00, 0x19000400) __AARCH64_INSN_FUNCS(stp, 0x7FC00000, 0x29000000) __AARCH64_INSN_FUNCS(ldp, 0x7FC00000, 0x29400000) __AARCH64_INSN_FUNCS(stp_post, 0x7FC00000, 0x28800000) @@ -569,6 +576,11 @@ static __always_inline u32 aarch64_insn_gen_nop(void) return aarch64_insn_gen_hint(AARCH64_INSN_HINT_NOP); } +static __always_inline bool aarch64_insn_is_nop(u32 insn) +{ + return insn == aarch64_insn_gen_nop(); +} + u32 aarch64_insn_gen_branch_reg(enum aarch64_insn_register reg, enum aarch64_insn_branch_type type); u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg, @@ -686,6 +698,8 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, } #endif u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type); +u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result, + enum aarch64_insn_system_register sysreg); s32 aarch64_get_branch_offset(u32 insn); u32 aarch64_set_branch_offset(u32 insn, s32 offset); diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 3b694511b98f..76ebbdc6ffdd 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -17,6 +17,7 @@ #include <asm/early_ioremap.h> #include <asm/alternative.h> #include <asm/cpufeature.h> +#include <asm/rsi.h> /* * Generic IO read/write. These perform native-endian accesses. @@ -24,25 +25,29 @@ #define __raw_writeb __raw_writeb static __always_inline void __raw_writeb(u8 val, volatile void __iomem *addr) { - asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr)); + volatile u8 __iomem *ptr = addr; + asm volatile("strb %w0, %1" : : "rZ" (val), "Qo" (*ptr)); } #define __raw_writew __raw_writew static __always_inline void __raw_writew(u16 val, volatile void __iomem *addr) { - asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr)); + volatile u16 __iomem *ptr = addr; + asm volatile("strh %w0, %1" : : "rZ" (val), "Qo" (*ptr)); } #define __raw_writel __raw_writel static __always_inline void __raw_writel(u32 val, volatile void __iomem *addr) { - asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr)); + volatile u32 __iomem *ptr = addr; + asm volatile("str %w0, %1" : : "rZ" (val), "Qo" (*ptr)); } #define __raw_writeq __raw_writeq static __always_inline void __raw_writeq(u64 val, volatile void __iomem *addr) { - asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr)); + volatile u64 __iomem *ptr = addr; + asm volatile("str %x0, %1" : : "rZ" (val), "Qo" (*ptr)); } #define __raw_readb __raw_readb @@ -125,20 +130,141 @@ static __always_inline u64 __raw_readq(const volatile void __iomem *addr) #define PCI_IOBASE ((void __iomem *)PCI_IO_START) /* - * String version of I/O memory access operations. + * The ARM64 iowrite implementation is intended to support drivers that want to + * use write combining. For instance PCI drivers using write combining with a 64 + * byte __iowrite64_copy() expect to get a 64 byte MemWr TLP on the PCIe bus. + * + * Newer ARM core have sensitive write combining buffers, it is important that + * the stores be contiguous blocks of store instructions. Normal memcpy + * approaches have a very low chance to generate write combining. + * + * Since this is the only API on ARM64 that should be used with write combining + * it also integrates the DGH hint which is supposed to lower the latency to + * emit the large TLP from the CPU. */ -extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t); -extern void __memcpy_toio(volatile void __iomem *, const void *, size_t); -extern void __memset_io(volatile void __iomem *, int, size_t); -#define memset_io(c,v,l) __memset_io((c),(v),(l)) -#define memcpy_fromio(a,c,l) __memcpy_fromio((a),(c),(l)) -#define memcpy_toio(c,a,l) __memcpy_toio((c),(a),(l)) +static __always_inline void +__const_memcpy_toio_aligned32(volatile u32 __iomem *to, const u32 *from, + size_t count) +{ + switch (count) { + case 8: + asm volatile("str %w0, [%8, #4 * 0]\n" + "str %w1, [%8, #4 * 1]\n" + "str %w2, [%8, #4 * 2]\n" + "str %w3, [%8, #4 * 3]\n" + "str %w4, [%8, #4 * 4]\n" + "str %w5, [%8, #4 * 5]\n" + "str %w6, [%8, #4 * 6]\n" + "str %w7, [%8, #4 * 7]\n" + : + : "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]), + "rZ"(from[3]), "rZ"(from[4]), "rZ"(from[5]), + "rZ"(from[6]), "rZ"(from[7]), "r"(to)); + break; + case 4: + asm volatile("str %w0, [%4, #4 * 0]\n" + "str %w1, [%4, #4 * 1]\n" + "str %w2, [%4, #4 * 2]\n" + "str %w3, [%4, #4 * 3]\n" + : + : "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]), + "rZ"(from[3]), "r"(to)); + break; + case 2: + asm volatile("str %w0, [%2, #4 * 0]\n" + "str %w1, [%2, #4 * 1]\n" + : + : "rZ"(from[0]), "rZ"(from[1]), "r"(to)); + break; + case 1: + __raw_writel(*from, to); + break; + default: + BUILD_BUG(); + } +} + +void __iowrite32_copy_full(void __iomem *to, const void *from, size_t count); + +static __always_inline void +__iowrite32_copy(void __iomem *to, const void *from, size_t count) +{ + if (__builtin_constant_p(count) && + (count == 8 || count == 4 || count == 2 || count == 1)) { + __const_memcpy_toio_aligned32(to, from, count); + dgh(); + } else { + __iowrite32_copy_full(to, from, count); + } +} +#define __iowrite32_copy __iowrite32_copy + +static __always_inline void +__const_memcpy_toio_aligned64(volatile u64 __iomem *to, const u64 *from, + size_t count) +{ + switch (count) { + case 8: + asm volatile("str %x0, [%8, #8 * 0]\n" + "str %x1, [%8, #8 * 1]\n" + "str %x2, [%8, #8 * 2]\n" + "str %x3, [%8, #8 * 3]\n" + "str %x4, [%8, #8 * 4]\n" + "str %x5, [%8, #8 * 5]\n" + "str %x6, [%8, #8 * 6]\n" + "str %x7, [%8, #8 * 7]\n" + : + : "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]), + "rZ"(from[3]), "rZ"(from[4]), "rZ"(from[5]), + "rZ"(from[6]), "rZ"(from[7]), "r"(to)); + break; + case 4: + asm volatile("str %x0, [%4, #8 * 0]\n" + "str %x1, [%4, #8 * 1]\n" + "str %x2, [%4, #8 * 2]\n" + "str %x3, [%4, #8 * 3]\n" + : + : "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]), + "rZ"(from[3]), "r"(to)); + break; + case 2: + asm volatile("str %x0, [%2, #8 * 0]\n" + "str %x1, [%2, #8 * 1]\n" + : + : "rZ"(from[0]), "rZ"(from[1]), "r"(to)); + break; + case 1: + __raw_writeq(*from, to); + break; + default: + BUILD_BUG(); + } +} + +void __iowrite64_copy_full(void __iomem *to, const void *from, size_t count); + +static __always_inline void +__iowrite64_copy(void __iomem *to, const void *from, size_t count) +{ + if (__builtin_constant_p(count) && + (count == 8 || count == 4 || count == 2 || count == 1)) { + __const_memcpy_toio_aligned64(to, from, count); + dgh(); + } else { + __iowrite64_copy_full(to, from, count); + } +} +#define __iowrite64_copy __iowrite64_copy /* * I/O memory mapping functions. */ +typedef int (*ioremap_prot_hook_t)(phys_addr_t phys_addr, size_t size, + pgprot_t *prot); +int arm64_ioremap_prot_hook_register(const ioremap_prot_hook_t hook); + #define ioremap_prot ioremap_prot #define _PAGE_IOREMAP PROT_DEVICE_nGnRE @@ -182,4 +308,11 @@ extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, unsigned long flags); #define arch_memremap_can_ram_remap arch_memremap_can_ram_remap +static inline bool arm64_is_protected_mmio(phys_addr_t phys_addr, size_t size) +{ + if (unlikely(is_realm_world())) + return __arm64_is_protected_mmio(phys_addr, size); + return false; +} + #endif /* __ASM_IO_H */ diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index 0a7186a93882..d4d7451c2c12 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -5,7 +5,6 @@ #ifndef __ASM_IRQFLAGS_H #define __ASM_IRQFLAGS_H -#include <asm/alternative.h> #include <asm/barrier.h> #include <asm/ptrace.h> #include <asm/sysreg.h> diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 6aafbb789991..424ed421cd97 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -13,19 +13,30 @@ #include <linux/types.h> #include <asm/insn.h> +#define HAVE_JUMP_LABEL_BATCH #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE +#define JUMP_TABLE_ENTRY(key, label) \ + ".pushsection __jump_table, \"aw\"\n\t" \ + ".align 3\n\t" \ + ".long 1b - ., " label " - .\n\t" \ + ".quad " key " - .\n\t" \ + ".popsection\n\t" + +/* This macro is also expanded on the Rust side. */ +#define ARCH_STATIC_BRANCH_ASM(key, label) \ + "1: nop\n\t" \ + JUMP_TABLE_ENTRY(key, label) + static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { + char *k = &((char *)key)[branch]; + asm goto( - "1: nop \n\t" - " .pushsection __jump_table, \"aw\" \n\t" - " .align 3 \n\t" - " .long 1b - ., %l[l_yes] - . \n\t" - " .quad %c0 - . \n\t" - " .popsection \n\t" - : : "i"(&((char *)key)[branch]) : : l_yes); + ARCH_STATIC_BRANCH_ASM("%c0", "%l[l_yes]") + : : "i"(k) : : l_yes + ); return false; l_yes: @@ -35,15 +46,13 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { + char *k = &((char *)key)[branch]; + asm goto( "1: b %l[l_yes] \n\t" - " .pushsection __jump_table, \"aw\" \n\t" - " .align 3 \n\t" - " .long 1b - ., %l[l_yes] - . \n\t" - " .quad %c0 - . \n\t" - " .popsection \n\t" - : : "i"(&((char *)key)[branch]) : : l_yes); - + JUMP_TABLE_ENTRY("%c0", "%l[l_yes]") + : : "i"(k) : : l_yes + ); return false; l_yes: return true; diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h index 7eefc525a9df..e1b57c13f8a4 100644 --- a/arch/arm64/include/asm/kasan.h +++ b/arch/arm64/include/asm/kasan.h @@ -17,11 +17,9 @@ asmlinkage void kasan_early_init(void); void kasan_init(void); -void kasan_copy_shadow(pgd_t *pgdir); #else static inline void kasan_init(void) { } -static inline void kasan_copy_shadow(pgd_t *pgdir) { } #endif #endif diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 83ddb14b95a5..fd5a08450b12 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -13,28 +13,26 @@ #include <asm/sparsemem.h> /* - * The linear mapping and the start of memory are both 2M aligned (per - * the arm64 booting.txt requirements). Hence we can use section mapping - * with 4K (section size = 2M) but not with 16K (section size = 32M) or - * 64K (section size = 512M). + * The physical and virtual addresses of the start of the kernel image are + * equal modulo 2 MiB (per the arm64 booting.txt requirements). Hence we can + * use section mapping with 4K (section size = 2M) but not with 16K (section + * size = 32M) or 64K (section size = 512M). */ - -/* - * The idmap and swapper page tables need some space reserved in the kernel - * image. Both require pgd, pud (4 levels only) and pmd tables to (section) - * map the kernel. With the 64K page configuration, swapper and idmap need to - * map to pte level. The swapper also maps the FDT (see __create_page_tables - * for more information). Note that the number of ID map translation levels - * could be increased on the fly if system RAM is out of reach for the default - * VA range, so pages required to map highest possible PA are reserved in all - * cases. - */ -#ifdef CONFIG_ARM64_4K_PAGES -#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1) +#if defined(PMD_SIZE) && PMD_SIZE <= MIN_KIMG_ALIGN +#define SWAPPER_BLOCK_SHIFT PMD_SHIFT +#define SWAPPER_SKIP_LEVEL 1 #else -#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS) +#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT +#define SWAPPER_SKIP_LEVEL 0 #endif +#define SWAPPER_BLOCK_SIZE (UL(1) << SWAPPER_BLOCK_SHIFT) +#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - SWAPPER_SKIP_LEVEL) +#define INIT_IDMAP_PGTABLE_LEVELS (IDMAP_LEVELS - SWAPPER_SKIP_LEVEL) + +#define IDMAP_VA_BITS 48 +#define IDMAP_LEVELS ARM64_HW_PGTABLE_LEVELS(IDMAP_VA_BITS) +#define IDMAP_ROOT_LEVEL (4 - IDMAP_LEVELS) /* * A relocatable kernel may execute from an address that differs from the one at @@ -50,57 +48,39 @@ #define EARLY_ENTRIES(vstart, vend, shift, add) \ (SPAN_NR_ENTRIES(vstart, vend, shift) + (add)) -#define EARLY_PGDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT, add)) - -#if SWAPPER_PGTABLE_LEVELS > 3 -#define EARLY_PUDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, PUD_SHIFT, add)) -#else -#define EARLY_PUDS(vstart, vend, add) (0) -#endif +#define EARLY_LEVEL(lvl, lvls, vstart, vend, add) \ + (lvls > lvl ? EARLY_ENTRIES(vstart, vend, SWAPPER_BLOCK_SHIFT + lvl * (PAGE_SHIFT - 3), add) : 0) -#if SWAPPER_PGTABLE_LEVELS > 2 -#define EARLY_PMDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, SWAPPER_TABLE_SHIFT, add)) -#else -#define EARLY_PMDS(vstart, vend, add) (0) -#endif +#define EARLY_PAGES(lvls, vstart, vend, add) (1 /* PGDIR page */ \ + + EARLY_LEVEL(3, (lvls), (vstart), (vend), add) /* each entry needs a next level page table */ \ + + EARLY_LEVEL(2, (lvls), (vstart), (vend), add) /* each entry needs a next level page table */ \ + + EARLY_LEVEL(1, (lvls), (vstart), (vend), add))/* each entry needs a next level page table */ +#define INIT_DIR_SIZE (PAGE_SIZE * (EARLY_PAGES(SWAPPER_PGTABLE_LEVELS, KIMAGE_VADDR, _end, EXTRA_PAGE) \ + + EARLY_SEGMENT_EXTRA_PAGES)) -#define EARLY_PAGES(vstart, vend, add) ( 1 /* PGDIR page */ \ - + EARLY_PGDS((vstart), (vend), add) /* each PGDIR needs a next level page table */ \ - + EARLY_PUDS((vstart), (vend), add) /* each PUD needs a next level page table */ \ - + EARLY_PMDS((vstart), (vend), add)) /* each PMD needs a next level page table */ -#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end, EXTRA_PAGE)) +#define INIT_IDMAP_DIR_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, KIMAGE_VADDR, _end, 1)) +#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + EARLY_IDMAP_EXTRA_PAGES) * PAGE_SIZE) -/* the initial ID map may need two extra pages if it needs to be extended */ -#if VA_BITS < 48 -#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + 2) * PAGE_SIZE) -#else -#define INIT_IDMAP_DIR_SIZE (INIT_IDMAP_DIR_PAGES * PAGE_SIZE) -#endif -#define INIT_IDMAP_DIR_PAGES EARLY_PAGES(KIMAGE_VADDR, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE, 1) +#define INIT_IDMAP_FDT_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, 0UL, UL(MAX_FDT_SIZE), 1) - 1) +#define INIT_IDMAP_FDT_SIZE ((INIT_IDMAP_FDT_PAGES + EARLY_IDMAP_EXTRA_FDT_PAGES) * PAGE_SIZE) -/* Initial memory map size */ -#ifdef CONFIG_ARM64_4K_PAGES -#define SWAPPER_BLOCK_SHIFT PMD_SHIFT -#define SWAPPER_BLOCK_SIZE PMD_SIZE -#define SWAPPER_TABLE_SHIFT PUD_SHIFT -#else -#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT -#define SWAPPER_BLOCK_SIZE PAGE_SIZE -#define SWAPPER_TABLE_SHIFT PMD_SHIFT -#endif +/* The number of segments in the kernel image (text, rodata, inittext, initdata, data+bss) */ +#define KERNEL_SEGMENT_COUNT 5 +#if SWAPPER_BLOCK_SIZE > SEGMENT_ALIGN +#define EARLY_SEGMENT_EXTRA_PAGES (KERNEL_SEGMENT_COUNT + 1) /* - * Initial memory map attributes. + * The initial ID map consists of the kernel image, mapped as two separate + * segments, and may appear misaligned wrt the swapper block size. This means + * we need 3 additional pages. The DT could straddle a swapper block boundary, + * so it may need 2. */ -#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_UXN) -#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PTE_UXN) - -#ifdef CONFIG_ARM64_4K_PAGES -#define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS | PTE_WRITE) -#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PMD_SECT_RDONLY) +#define EARLY_IDMAP_EXTRA_PAGES 3 +#define EARLY_IDMAP_EXTRA_FDT_PAGES 2 #else -#define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS | PTE_WRITE) -#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY) +#define EARLY_SEGMENT_EXTRA_PAGES 0 +#define EARLY_IDMAP_EXTRA_PAGES 0 +#define EARLY_IDMAP_EXTRA_FDT_PAGES 0 #endif #endif /* __ASM_KERNEL_PGTABLE_H */ diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 9ac9572a3bbe..4d9cc7a76d9c 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -80,7 +80,7 @@ static inline void crash_setup_regs(struct pt_regs *newregs, } } -#if defined(CONFIG_KEXEC_CORE) && defined(CONFIG_HIBERNATION) +#if defined(CONFIG_CRASH_DUMP) && defined(CONFIG_HIBERNATION) extern bool crash_is_nosave(unsigned long pfn); extern void crash_prepare_suspend(void); extern void crash_post_resume(void); diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 3c6f8ba1e479..c2417a424b98 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -102,14 +102,13 @@ #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) -#define HCRX_GUEST_FLAGS \ - (HCRX_EL2_SMPME | HCRX_EL2_TCR2En | \ - (cpus_have_final_cap(ARM64_HAS_MOPS) ? (HCRX_EL2_MSCEn | HCRX_EL2_MCE2) : 0)) -#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En) +#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) +#define MPAMHCR_HOST_FLAGS 0 /* TCR_EL2 Registers bits */ #define TCR_EL2_DS (1UL << 32) #define TCR_EL2_RES1 ((1U << 31) | (1 << 23)) +#define TCR_EL2_HPD (1 << 24) #define TCR_EL2_TBI (1 << 20) #define TCR_EL2_PS_SHIFT 16 #define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT) @@ -120,7 +119,7 @@ #define TCR_EL2_IRGN0_MASK TCR_IRGN0_MASK #define TCR_EL2_T0SZ_MASK 0x3f #define TCR_EL2_MASK (TCR_EL2_TG0_MASK | TCR_EL2_SH0_MASK | \ - TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK) + TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK) /* VTCR_EL2 Registers bits */ #define VTCR_EL2_DS TCR_EL2_DS @@ -301,40 +300,17 @@ #define CPTR_EL2_TSM (1 << 12) #define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT) #define CPTR_EL2_TZ (1 << 8) -#define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */ +#define CPTR_NVHE_EL2_RES1 (BIT(13) | BIT(9) | GENMASK(7, 0)) #define CPTR_NVHE_EL2_RES0 (GENMASK(63, 32) | \ GENMASK(29, 21) | \ GENMASK(19, 14) | \ BIT(11)) -/* Hyp Debug Configuration Register bits */ -#define MDCR_EL2_E2TB_MASK (UL(0x3)) -#define MDCR_EL2_E2TB_SHIFT (UL(24)) -#define MDCR_EL2_HPMFZS (UL(1) << 36) -#define MDCR_EL2_HPMFZO (UL(1) << 29) -#define MDCR_EL2_MTPME (UL(1) << 28) -#define MDCR_EL2_TDCC (UL(1) << 27) -#define MDCR_EL2_HLP (UL(1) << 26) -#define MDCR_EL2_HCCD (UL(1) << 23) -#define MDCR_EL2_TTRF (UL(1) << 19) -#define MDCR_EL2_HPMD (UL(1) << 17) -#define MDCR_EL2_TPMS (UL(1) << 14) -#define MDCR_EL2_E2PB_MASK (UL(0x3)) -#define MDCR_EL2_E2PB_SHIFT (UL(12)) -#define MDCR_EL2_TDRA (UL(1) << 11) -#define MDCR_EL2_TDOSA (UL(1) << 10) -#define MDCR_EL2_TDA (UL(1) << 9) -#define MDCR_EL2_TDE (UL(1) << 8) -#define MDCR_EL2_HPME (UL(1) << 7) -#define MDCR_EL2_TPM (UL(1) << 6) -#define MDCR_EL2_TPMCR (UL(1) << 5) -#define MDCR_EL2_HPMN_MASK (UL(0x1F)) -#define MDCR_EL2_RES0 (GENMASK(63, 37) | \ - GENMASK(35, 30) | \ - GENMASK(25, 24) | \ - GENMASK(22, 20) | \ - BIT(18) | \ - GENMASK(16, 15)) +#define CPTR_VHE_EL2_RES0 (GENMASK(63, 32) | \ + GENMASK(27, 26) | \ + GENMASK(23, 22) | \ + GENMASK(19, 18) | \ + GENMASK(15, 0)) /* * FGT register definitions @@ -415,8 +391,6 @@ ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \ ECN(BKPT32), ECN(VECTOR32), ECN(BRK64), ECN(ERET) -#define CPACR_EL1_TTA (1 << 28) - #define kvm_mode_names \ { PSR_MODE_EL0t, "EL0t" }, \ { PSR_MODE_EL1t, "EL1t" }, \ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 24b5e6b23417..bec227f9500a 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -10,6 +10,7 @@ #include <asm/hyp_image.h> #include <asm/insn.h> #include <asm/virt.h> +#include <asm/sysreg.h> #define ARM_EXIT_WITH_SERROR_BIT 31 #define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT)) @@ -52,8 +53,7 @@ enum __kvm_host_smccc_func { /* Hypercalls available only prior to pKVM finalisation */ /* __KVM_HOST_SMCCC_FUNC___kvm_hyp_init */ - __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1, - __KVM_HOST_SMCCC_FUNC___pkvm_init, + __KVM_HOST_SMCCC_FUNC___pkvm_init = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1, __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping, __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector, __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs, @@ -64,6 +64,12 @@ enum __kvm_host_smccc_func { /* Hypercalls available after pKVM finalisation */ __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp, __KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp, + __KVM_HOST_SMCCC_FUNC___pkvm_host_share_guest, + __KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_guest, + __KVM_HOST_SMCCC_FUNC___pkvm_host_relax_perms_guest, + __KVM_HOST_SMCCC_FUNC___pkvm_host_wrprotect_guest, + __KVM_HOST_SMCCC_FUNC___pkvm_host_test_clear_young_guest, + __KVM_HOST_SMCCC_FUNC___pkvm_host_mkyoung_guest, __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc, __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run, __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context, @@ -73,14 +79,14 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range, __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context, __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, - __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr, - __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr, - __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs, - __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs, - __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps, + __KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs, + __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs, __KVM_HOST_SMCCC_FUNC___pkvm_init_vm, __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu, __KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm, + __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load, + __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put, + __KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid, }; #define DECLARE_KVM_VHE_SYM(sym) extern char sym[] @@ -179,6 +185,7 @@ struct kvm_nvhe_init_params { unsigned long hcr_el2; unsigned long vttbr; unsigned long vtcr; + unsigned long tmp; }; /* @@ -234,19 +241,20 @@ extern void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, phys_addr_t start, unsigned long pages); extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu); +extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding); + extern void __kvm_timer_set_cntvoff(u64 cntvoff); +extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); +extern void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); +extern void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern void __kvm_adjust_pc(struct kvm_vcpu *vcpu); extern u64 __vgic_v3_get_gic_config(void); -extern u64 __vgic_v3_read_vmcr(void); -extern void __vgic_v3_write_vmcr(u32 vmcr); extern void __vgic_v3_init_lrs(void); -extern u64 __kvm_get_mdcr_el2(void); - #define __KVM_EXTABLE(from, to) \ " .pushsection __kvm_ex_table, \"a\"\n" \ " .align 3\n" \ @@ -261,7 +269,7 @@ extern u64 __kvm_get_mdcr_el2(void); asm volatile( \ " mrs %1, spsr_el2\n" \ " mrs %2, elr_el2\n" \ - "1: at "at_op", %3\n" \ + "1: " __msr_s(at_op, "%3") "\n" \ " isb\n" \ " b 9f\n" \ "2: msr spsr_el2, %1\n" \ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index b804fe832184..78ec1ef2cfe8 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -11,6 +11,7 @@ #ifndef __ARM64_KVM_EMULATE_H__ #define __ARM64_KVM_EMULATE_H__ +#include <linux/bitfield.h> #include <linux/kvm_host.h> #include <asm/debug-monitors.h> @@ -55,6 +56,14 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu); int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2); int kvm_inject_nested_irq(struct kvm_vcpu *vcpu); +static inline void kvm_inject_nested_sve_trap(struct kvm_vcpu *vcpu) +{ + u64 esr = FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_SVE) | + ESR_ELx_IL; + + kvm_inject_nested_sync(vcpu, esr); +} + #if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__) static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { @@ -69,39 +78,17 @@ static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) { - vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; - if (has_vhe() || has_hvhe()) - vcpu->arch.hcr_el2 |= HCR_E2H; - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) { - /* route synchronous external abort exceptions to EL2 */ - vcpu->arch.hcr_el2 |= HCR_TEA; - /* trap error record accesses */ - vcpu->arch.hcr_el2 |= HCR_TERR; - } + if (!vcpu_has_run_once(vcpu)) + vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; - if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) { - vcpu->arch.hcr_el2 |= HCR_FWB; - } else { - /* - * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C - * get set in SCTLR_EL1 such that we can detect when the guest - * MMU gets turned on and do the necessary cache maintenance - * then. - */ + /* + * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C + * get set in SCTLR_EL1 such that we can detect when the guest + * MMU gets turned on and do the necessary cache maintenance + * then. + */ + if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) vcpu->arch.hcr_el2 |= HCR_TVM; - } - - if (cpus_have_final_cap(ARM64_HAS_EVT) && - !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE)) - vcpu->arch.hcr_el2 |= HCR_TID4; - else - vcpu->arch.hcr_el2 |= HCR_TID2; - - if (vcpu_el1_is_32bit(vcpu)) - vcpu->arch.hcr_el2 &= ~HCR_RW; - - if (kvm_has_mte(vcpu->kvm)) - vcpu->arch.hcr_el2 |= HCR_ATA; } static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) @@ -125,16 +112,6 @@ static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 |= HCR_TWI; } -static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu) -{ - vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK); -} - -static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu) -{ - vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK); -} - static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu) { return vcpu->arch.vsesr_el2; @@ -207,28 +184,30 @@ static inline bool vcpu_is_el2(const struct kvm_vcpu *vcpu) return vcpu_is_el2_ctxt(&vcpu->arch.ctxt); } -static inline bool __vcpu_el2_e2h_is_set(const struct kvm_cpu_context *ctxt) -{ - return ctxt_sys_reg(ctxt, HCR_EL2) & HCR_E2H; -} - static inline bool vcpu_el2_e2h_is_set(const struct kvm_vcpu *vcpu) { - return __vcpu_el2_e2h_is_set(&vcpu->arch.ctxt); -} - -static inline bool __vcpu_el2_tge_is_set(const struct kvm_cpu_context *ctxt) -{ - return ctxt_sys_reg(ctxt, HCR_EL2) & HCR_TGE; + return (!cpus_have_final_cap(ARM64_HAS_HCR_NV1) || + (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_E2H)); } static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu) { - return __vcpu_el2_tge_is_set(&vcpu->arch.ctxt); + return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_TGE; } -static inline bool __is_hyp_ctxt(const struct kvm_cpu_context *ctxt) +static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu) { + bool e2h, tge; + u64 hcr; + + if (!vcpu_has_nv(vcpu)) + return false; + + hcr = __vcpu_sys_reg(vcpu, HCR_EL2); + + e2h = (hcr & HCR_E2H); + tge = (hcr & HCR_TGE); + /* * We are in a hypervisor context if the vcpu mode is EL2 or * E2H and TGE bits are set. The latter means we are in the user space @@ -237,14 +216,12 @@ static inline bool __is_hyp_ctxt(const struct kvm_cpu_context *ctxt) * Note that the HCR_EL2.{E2H,TGE}={0,1} isn't really handled in the * rest of the KVM code, and will result in a misbehaving guest. */ - return vcpu_is_el2_ctxt(ctxt) || - (__vcpu_el2_e2h_is_set(ctxt) && __vcpu_el2_tge_is_set(ctxt)) || - __vcpu_el2_tge_is_set(ctxt); + return vcpu_is_el2(vcpu) || (e2h && tge) || tge; } -static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu) +static inline bool vcpu_is_host_el0(const struct kvm_vcpu *vcpu) { - return vcpu_has_nv(vcpu) && __is_hyp_ctxt(&vcpu->arch.ctxt); + return is_hyp_ctxt(vcpu) && !vcpu_is_el2(vcpu); } /* @@ -425,15 +402,9 @@ static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu) { switch (kvm_vcpu_trap_get_fault(vcpu)) { case ESR_ELx_FSC_EXTABT: - case ESR_ELx_FSC_SEA_TTW0: - case ESR_ELx_FSC_SEA_TTW1: - case ESR_ELx_FSC_SEA_TTW2: - case ESR_ELx_FSC_SEA_TTW3: + case ESR_ELx_FSC_SEA_TTW(-1) ... ESR_ELx_FSC_SEA_TTW(3): case ESR_ELx_FSC_SECC: - case ESR_ELx_FSC_SECC_TTW0: - case ESR_ELx_FSC_SECC_TTW1: - case ESR_ELx_FSC_SECC_TTW2: - case ESR_ELx_FSC_SECC_TTW3: + case ESR_ELx_FSC_SECC_TTW(-1) ... ESR_ELx_FSC_SECC_TTW(3): return true; default: return false; @@ -572,48 +543,110 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu) vcpu_set_flag((v), e); \ } while (0) -static __always_inline void kvm_write_cptr_el2(u64 val) -{ - if (has_vhe() || has_hvhe()) - write_sysreg(val, cpacr_el1); - else - write_sysreg(val, cptr_el2); -} +#define __build_check_all_or_none(r, bits) \ + BUILD_BUG_ON(((r) & (bits)) && ((r) & (bits)) != (bits)) + +#define __cpacr_to_cptr_clr(clr, set) \ + ({ \ + u64 cptr = 0; \ + \ + if ((set) & CPACR_EL1_FPEN) \ + cptr |= CPTR_EL2_TFP; \ + if ((set) & CPACR_EL1_ZEN) \ + cptr |= CPTR_EL2_TZ; \ + if ((set) & CPACR_EL1_SMEN) \ + cptr |= CPTR_EL2_TSM; \ + if ((clr) & CPACR_EL1_TTA) \ + cptr |= CPTR_EL2_TTA; \ + if ((clr) & CPTR_EL2_TAM) \ + cptr |= CPTR_EL2_TAM; \ + if ((clr) & CPTR_EL2_TCPAC) \ + cptr |= CPTR_EL2_TCPAC; \ + \ + cptr; \ + }) + +#define __cpacr_to_cptr_set(clr, set) \ + ({ \ + u64 cptr = 0; \ + \ + if ((clr) & CPACR_EL1_FPEN) \ + cptr |= CPTR_EL2_TFP; \ + if ((clr) & CPACR_EL1_ZEN) \ + cptr |= CPTR_EL2_TZ; \ + if ((clr) & CPACR_EL1_SMEN) \ + cptr |= CPTR_EL2_TSM; \ + if ((set) & CPACR_EL1_TTA) \ + cptr |= CPTR_EL2_TTA; \ + if ((set) & CPTR_EL2_TAM) \ + cptr |= CPTR_EL2_TAM; \ + if ((set) & CPTR_EL2_TCPAC) \ + cptr |= CPTR_EL2_TCPAC; \ + \ + cptr; \ + }) + +#define cpacr_clear_set(clr, set) \ + do { \ + BUILD_BUG_ON((set) & CPTR_VHE_EL2_RES0); \ + BUILD_BUG_ON((clr) & CPACR_EL1_E0POE); \ + __build_check_all_or_none((clr), CPACR_EL1_FPEN); \ + __build_check_all_or_none((set), CPACR_EL1_FPEN); \ + __build_check_all_or_none((clr), CPACR_EL1_ZEN); \ + __build_check_all_or_none((set), CPACR_EL1_ZEN); \ + __build_check_all_or_none((clr), CPACR_EL1_SMEN); \ + __build_check_all_or_none((set), CPACR_EL1_SMEN); \ + \ + if (has_vhe() || has_hvhe()) \ + sysreg_clear_set(cpacr_el1, clr, set); \ + else \ + sysreg_clear_set(cptr_el2, \ + __cpacr_to_cptr_clr(clr, set), \ + __cpacr_to_cptr_set(clr, set));\ + } while (0) -static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) +/* + * Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE + * format if E2H isn't set. + */ +static inline u64 vcpu_sanitised_cptr_el2(const struct kvm_vcpu *vcpu) { - u64 val; + u64 cptr = __vcpu_sys_reg(vcpu, CPTR_EL2); - if (has_vhe()) { - val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN | - CPACR_EL1_ZEN_EL1EN); - if (cpus_have_final_cap(ARM64_SME)) - val |= CPACR_EL1_SMEN_EL1EN; - } else if (has_hvhe()) { - val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN); + if (!vcpu_el2_e2h_is_set(vcpu)) + cptr = translate_cptr_el2_to_cpacr_el1(cptr); - if (!vcpu_has_sve(vcpu) || - (vcpu->arch.fp_state != FP_STATE_GUEST_OWNED)) - val |= CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN; - if (cpus_have_final_cap(ARM64_SME)) - val |= CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN; - } else { - val = CPTR_NVHE_EL2_RES1; + return cptr; +} - if (vcpu_has_sve(vcpu) && - (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)) - val |= CPTR_EL2_TZ; - if (cpus_have_final_cap(ARM64_SME)) - val &= ~CPTR_EL2_TSM; +static inline bool ____cptr_xen_trap_enabled(const struct kvm_vcpu *vcpu, + unsigned int xen) +{ + switch (xen) { + case 0b00: + case 0b10: + return true; + case 0b01: + return vcpu_el2_tge_is_set(vcpu) && !vcpu_is_el2(vcpu); + case 0b11: + default: + return false; } - - return val; } -static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu) +#define __guest_hyp_cptr_xen_trap_enabled(vcpu, xen) \ + (!vcpu_has_nv(vcpu) ? false : \ + ____cptr_xen_trap_enabled(vcpu, \ + SYS_FIELD_GET(CPACR_EL1, xen, \ + vcpu_sanitised_cptr_el2(vcpu)))) + +static inline bool guest_hyp_fpsimd_traps_enabled(const struct kvm_vcpu *vcpu) { - u64 val = kvm_get_reset_cptr_el2(vcpu); + return __guest_hyp_cptr_xen_trap_enabled(vcpu, FPEN); +} - kvm_write_cptr_el2(val); +static inline bool guest_hyp_sve_traps_enabled(const struct kvm_vcpu *vcpu) +{ + return __guest_hyp_cptr_xen_trap_enabled(vcpu, ZEN); } #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 21c57b812569..d919557af5e5 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -51,6 +51,7 @@ #define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5) #define KVM_REQ_SUSPEND KVM_ARCH_REQ(6) #define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7) +#define KVM_REQ_NESTED_S2_UNMAP KVM_ARCH_REQ(8) #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) @@ -73,9 +74,8 @@ enum kvm_mode kvm_get_mode(void); static inline enum kvm_mode kvm_get_mode(void) { return KVM_MODE_NONE; }; #endif -DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); - extern unsigned int __ro_after_init kvm_sve_max_vl; +extern unsigned int __ro_after_init kvm_host_sve_max_vl; int __init kvm_arm_init_sve(void); u32 __attribute_const__ kvm_target_cpu(void); @@ -85,6 +85,7 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu); struct kvm_hyp_memcache { phys_addr_t head; unsigned long nr_pages; + struct pkvm_mapping *mapping; /* only used from EL1 */ }; static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc, @@ -99,7 +100,7 @@ static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc, static inline void *pop_hyp_memcache(struct kvm_hyp_memcache *mc, void *(*to_va)(phys_addr_t phys)) { - phys_addr_t *p = to_va(mc->head); + phys_addr_t *p = to_va(mc->head & PAGE_MASK); if (!mc->nr_pages) return NULL; @@ -188,6 +189,39 @@ struct kvm_s2_mmu { uint64_t split_page_chunk_size; struct kvm_arch *arch; + + /* + * For a shadow stage-2 MMU, the virtual vttbr used by the + * host to parse the guest S2. + * This either contains: + * - the virtual VTTBR programmed by the guest hypervisor with + * CnP cleared + * - The value 1 (VMID=0, BADDR=0, CnP=1) if invalid + * + * We also cache the full VTCR which gets used for TLB invalidation, + * taking the ARM ARM's "Any of the bits in VTCR_EL2 are permitted + * to be cached in a TLB" to the letter. + */ + u64 tlb_vttbr; + u64 tlb_vtcr; + + /* + * true when this represents a nested context where virtual + * HCR_EL2.VM == 1 + */ + bool nested_stage2_enabled; + + /* + * true when this MMU needs to be unmapped before being used for a new + * purpose. + */ + bool pending_unmap; + + /* + * 0: Nobody is currently using this, check vttbr for validity + * >0: Somebody is actively using this. + */ + atomic_t refcnt; }; struct kvm_arch_memory_slot { @@ -211,6 +245,7 @@ typedef unsigned int pkvm_handle_t; struct kvm_protected_vm { pkvm_handle_t handle; struct kvm_hyp_memcache teardown_mc; + bool enabled; }; struct kvm_mpidr_data { @@ -220,27 +255,48 @@ struct kvm_mpidr_data { static inline u16 kvm_mpidr_index(struct kvm_mpidr_data *data, u64 mpidr) { - unsigned long mask = data->mpidr_mask; - u64 aff = mpidr & MPIDR_HWID_BITMASK; - int nbits, bit, bit_idx = 0; - u16 index = 0; + unsigned long index = 0, mask = data->mpidr_mask; + unsigned long aff = mpidr & MPIDR_HWID_BITMASK; - /* - * If this looks like RISC-V's BEXT or x86's PEXT - * instructions, it isn't by accident. - */ - nbits = fls(mask); - for_each_set_bit(bit, &mask, nbits) { - index |= (aff & BIT(bit)) >> (bit - bit_idx); - bit_idx++; - } + bitmap_gather(&index, &aff, &mask, fls(mask)); return index; } +struct kvm_sysreg_masks; + +enum fgt_group_id { + __NO_FGT_GROUP__, + HFGxTR_GROUP, + HDFGRTR_GROUP, + HDFGWTR_GROUP = HDFGRTR_GROUP, + HFGITR_GROUP, + HAFGRTR_GROUP, + + /* Must be last */ + __NR_FGT_GROUP_IDS__ +}; + struct kvm_arch { struct kvm_s2_mmu mmu; + /* + * Fine-Grained UNDEF, mimicking the FGT layout defined by the + * architecture. We track them globally, as we present the + * same feature-set to all vcpus. + * + * Index 0 is currently spare. + */ + u64 fgu[__NR_FGT_GROUP_IDS__]; + + /* + * Stage 2 paging state for VMs with nested S2 using a virtual + * VMID. + */ + struct kvm_s2_mmu *nested_mmus; + size_t nested_mmus_size; + int nested_mmus_next; + /* Interrupt controller */ struct vgic_dist vgic; @@ -274,6 +330,10 @@ struct kvm_arch { #define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 6 /* Initial ID reg values loaded */ #define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 7 + /* Fine-Grained UNDEF initialised */ +#define KVM_ARCH_FLAG_FGU_INITIALIZED 8 + /* SVE exposed to guest */ +#define KVM_ARCH_FLAG_GUEST_HAS_SVE 9 unsigned long flags; /* VM-wide vCPU feature set */ @@ -294,6 +354,9 @@ struct kvm_arch { /* PMCR_EL0.N value for the guest */ u8 pmcr_n; + /* Iterator for idreg debugfs */ + u8 idreg_debugfs_iter; + /* Hypercall features firmware registers' descriptor */ struct kvm_smccc_features smccc_feat; struct maple_tree smccc_filter; @@ -307,11 +370,14 @@ struct kvm_arch { * Atomic access to multiple idregs are guarded by kvm_arch.config_lock. */ #define IDREG_IDX(id) (((sys_reg_CRm(id) - 1) << 3) | sys_reg_Op2(id)) -#define IDX_IDREG(idx) sys_reg(3, 0, 0, ((idx) >> 3) + 1, (idx) & Op2_mask) -#define IDREG(kvm, id) ((kvm)->arch.id_regs[IDREG_IDX(id)]) #define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1) u64 id_regs[KVM_ARM_ID_REG_NUM]; + u64 ctr_el0; + + /* Masks for VNCR-backed and general EL2 sysregs */ + struct kvm_sysreg_masks *sysreg_masks; + /* * For an untrusted host VM, 'pkvm.handle' is used to lookup * the associated pKVM instance in the hypervisor. @@ -343,6 +409,9 @@ struct kvm_vcpu_fault_info { r = __VNCR_START__ + ((VNCR_ ## r) / 8), \ __after_##r = __MAX__(__before_##r - 1, r) +#define MARKER(m) \ + m, __after_##m = m - 1 + enum vcpu_sysreg { __INVALID_SYSREG__, /* 0 is reserved as an invalid value */ MPIDR_EL1, /* MultiProcessor Affinity Register */ @@ -388,6 +457,12 @@ enum vcpu_sysreg { GCR_EL1, /* Tag Control Register */ TFSRE0_EL1, /* Tag Fault Status Register (EL0) */ + POR_EL0, /* Permission Overlay Register 0 (EL0) */ + + /* FP/SIMD/SVE */ + SVCR, + FPMR, + /* 32bit specific registers. */ DACR32_EL2, /* Domain Access Control Register */ IFSR32_EL2, /* Instruction Fault Status Register */ @@ -397,12 +472,15 @@ enum vcpu_sysreg { /* EL2 registers */ SCTLR_EL2, /* System Control Register (EL2) */ ACTLR_EL2, /* Auxiliary Control Register (EL2) */ - MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */ CPTR_EL2, /* Architectural Feature Trap Register (EL2) */ HACR_EL2, /* Hypervisor Auxiliary Control Register */ + ZCR_EL2, /* SVE Control Register (EL2) */ TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */ TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */ TCR_EL2, /* Translation Control Register (EL2) */ + PIRE0_EL2, /* Permission Indirection Register 0 (EL2) */ + PIR_EL2, /* Permission Indirection Register 1 (EL2) */ + POR_EL2, /* Permission Overlay Register 2 (EL2) */ SPSR_EL2, /* EL2 saved program status register */ ELR_EL2, /* EL2 exception link register */ AFSR0_EL2, /* Auxiliary Fault Status Register 0 (EL2) */ @@ -415,14 +493,20 @@ enum vcpu_sysreg { VBAR_EL2, /* Vector Base Address Register (EL2) */ RVBAR_EL2, /* Reset Vector Base Address Register */ CONTEXTIDR_EL2, /* Context ID Register (EL2) */ - CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */ SP_EL2, /* EL2 Stack Pointer */ CNTHP_CTL_EL2, CNTHP_CVAL_EL2, CNTHV_CTL_EL2, CNTHV_CVAL_EL2, - __VNCR_START__, /* Any VNCR-capable reg goes after this point */ + /* Anything from this can be RES0/RES1 sanitised */ + MARKER(__SANITISED_REG_START__), + TCR2_EL2, /* Extended Translation Control Register (EL2) */ + MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */ + CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */ + + /* Any VNCR-capable reg goes after this point */ + MARKER(__VNCR_START__), VNCR(SCTLR_EL1),/* System Control Register */ VNCR(ACTLR_EL1),/* Auxiliary Control Register */ @@ -458,6 +542,8 @@ enum vcpu_sysreg { VNCR(PIR_EL1), /* Permission Indirection Register 1 (EL1) */ VNCR(PIRE0_EL1), /* Permission Indirection Register 0 (EL1) */ + VNCR(POR_EL1), /* Permission Overlay Register 1 (EL1) */ + VNCR(HFGRTR_EL2), VNCR(HFGWTR_EL2), VNCR(HFGITR_EL2), @@ -471,9 +557,18 @@ enum vcpu_sysreg { VNCR(CNTP_CVAL_EL0), VNCR(CNTP_CTL_EL0), + VNCR(ICH_HCR_EL2), + NR_SYS_REGS /* Nothing after this line! */ }; +struct kvm_sysreg_masks { + struct { + u64 res0; + u64 res1; + } mask[NR_SYS_REGS - __SANITISED_REG_START__]; +}; + struct kvm_cpu_context { struct user_pt_regs regs; /* sp = sp_el0 */ @@ -492,8 +587,80 @@ struct kvm_cpu_context { u64 *vncr_array; }; +struct cpu_sve_state { + __u64 zcr_el1; + + /* + * Ordering is important since __sve_save_state/__sve_restore_state + * relies on it. + */ + __u32 fpsr; + __u32 fpcr; + + /* Must be SVE_VQ_BYTES (128 bit) aligned. */ + __u8 sve_regs[]; +}; + +/* + * This structure is instantiated on a per-CPU basis, and contains + * data that is: + * + * - tied to a single physical CPU, and + * - either have a lifetime that does not extend past vcpu_put() + * - or is an invariant for the lifetime of the system + * + * Use host_data_ptr(field) as a way to access a pointer to such a + * field. + */ struct kvm_host_data { +#define KVM_HOST_DATA_FLAG_HAS_SPE 0 +#define KVM_HOST_DATA_FLAG_HAS_TRBE 1 +#define KVM_HOST_DATA_FLAG_TRBE_ENABLED 4 +#define KVM_HOST_DATA_FLAG_EL1_TRACING_CONFIGURED 5 + unsigned long flags; + struct kvm_cpu_context host_ctxt; + + /* + * Hyp VA. + * sve_state is only used in pKVM and if system_supports_sve(). + */ + struct cpu_sve_state *sve_state; + + /* Used by pKVM only. */ + u64 fpmr; + + /* Ownership of the FP regs */ + enum { + FP_STATE_FREE, + FP_STATE_HOST_OWNED, + FP_STATE_GUEST_OWNED, + } fp_owner; + + /* + * host_debug_state contains the host registers which are + * saved and restored during world switches. + */ + struct { + /* {Break,watch}point registers */ + struct kvm_guest_debug_arch regs; + /* Statistical profiling extension */ + u64 pmscr_el1; + /* Self-hosted trace */ + u64 trfcr_el1; + /* Values of trap registers for the host before guest entry. */ + u64 mdcr_el2; + } host_debug_state; + + /* Guest trace filter value */ + u64 trfcr_while_in_guest; + + /* Number of programmable event counters (PMCR_EL0.N) for this CPU */ + unsigned int nr_event_counters; + + /* Number of debug breakpoints/watchpoints for this CPU (minus 1) */ + unsigned int debug_brps; + unsigned int debug_wrps; }; struct kvm_host_psci_config { @@ -542,29 +709,18 @@ struct kvm_vcpu_arch { void *sve_state; enum fp_type fp_type; unsigned int sve_max_vl; - u64 svcr; /* Stage 2 paging state used by the hardware on next switch */ struct kvm_s2_mmu *hw_mmu; /* Values of trap registers for the guest. */ u64 hcr_el2; + u64 hcrx_el2; u64 mdcr_el2; - u64 cptr_el2; - - /* Values of trap registers for the host before guest entry. */ - u64 mdcr_el2_host; /* Exception Information */ struct kvm_vcpu_fault_info fault; - /* Ownership of the FP regs */ - enum { - FP_STATE_FREE, - FP_STATE_HOST_OWNED, - FP_STATE_GUEST_OWNED, - } fp_state; - /* Configuration flags, set once and for all before the vcpu can run */ u8 cflags; @@ -587,48 +743,26 @@ struct kvm_vcpu_arch { * We maintain more than a single set of debug registers to support * debugging the guest from the host and to maintain separate host and * guest state during world switches. vcpu_debug_state are the debug - * registers of the vcpu as the guest sees them. host_debug_state are - * the host registers which are saved and restored during - * world switches. external_debug_state contains the debug - * values we want to debug the guest. This is set via the - * KVM_SET_GUEST_DEBUG ioctl. + * registers of the vcpu as the guest sees them. * - * debug_ptr points to the set of debug registers that should be loaded - * onto the hardware when running the guest. + * external_debug_state contains the debug values we want to debug the + * guest. This is set via the KVM_SET_GUEST_DEBUG ioctl. */ - struct kvm_guest_debug_arch *debug_ptr; struct kvm_guest_debug_arch vcpu_debug_state; struct kvm_guest_debug_arch external_debug_state; + u64 external_mdscr_el1; - struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */ - struct task_struct *parent_task; - - struct { - /* {Break,watch}point registers */ - struct kvm_guest_debug_arch regs; - /* Statistical profiling extension */ - u64 pmscr_el1; - /* Self-hosted trace */ - u64 trfcr_el1; - } host_debug_state; + enum { + VCPU_DEBUG_FREE, + VCPU_DEBUG_HOST_OWNED, + VCPU_DEBUG_GUEST_OWNED, + } debug_owner; /* VGIC state */ struct vgic_cpu vgic_cpu; struct arch_timer_cpu timer_cpu; struct kvm_pmu pmu; - /* - * Guest registers we preserve during guest debugging. - * - * These shadow registers are updated by the kvm_handle_sys_reg - * trap handler if the guest accesses or updates them while we - * are using guest debug. - */ - struct { - u32 mdscr_el1; - bool pstate_ss; - } guest_debug_preserved; - /* vcpu power state */ struct kvm_mp_state mp_state; spinlock_t mp_state_lock; @@ -636,6 +770,9 @@ struct kvm_vcpu_arch { /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; + /* Pages to top-up the pKVM/EL2 guest pool */ + struct kvm_hyp_memcache pkvm_memcache; + /* Virtual SError ESR to restore when HCR_EL2.VSE is set */ u64 vsesr_el2; @@ -728,14 +865,10 @@ struct kvm_vcpu_arch { #define vcpu_set_flag(v, ...) __vcpu_set_flag((v), __VA_ARGS__) #define vcpu_clear_flag(v, ...) __vcpu_clear_flag((v), __VA_ARGS__) -/* SVE exposed to guest */ -#define GUEST_HAS_SVE __vcpu_single_flag(cflags, BIT(0)) +/* KVM_ARM_VCPU_INIT completed */ +#define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(0)) /* SVE config completed */ #define VCPU_SVE_FINALIZED __vcpu_single_flag(cflags, BIT(1)) -/* PTRAUTH exposed to guest */ -#define GUEST_HAS_PTRAUTH __vcpu_single_flag(cflags, BIT(2)) -/* KVM_ARM_VCPU_INIT completed */ -#define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(3)) /* Exception pending */ #define PENDING_EXCEPTION __vcpu_single_flag(iflags, BIT(0)) @@ -771,31 +904,21 @@ struct kvm_vcpu_arch { #define EXCEPT_AA64_EL2_IRQ __vcpu_except_flags(5) #define EXCEPT_AA64_EL2_FIQ __vcpu_except_flags(6) #define EXCEPT_AA64_EL2_SERR __vcpu_except_flags(7) -/* Guest debug is live */ -#define DEBUG_DIRTY __vcpu_single_flag(iflags, BIT(4)) -/* Save SPE context if active */ -#define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5)) -/* Save TRBE context if active */ -#define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6)) -/* vcpu running in HYP context */ -#define VCPU_HYP_CONTEXT __vcpu_single_flag(iflags, BIT(7)) - -/* SVE enabled for host EL0 */ -#define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0)) -/* SME enabled for EL0 */ -#define HOST_SME_ENABLED __vcpu_single_flag(sflags, BIT(1)) + /* Physical CPU not in supported_cpus */ -#define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(2)) +#define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(0)) /* WFIT instruction trapped */ -#define IN_WFIT __vcpu_single_flag(sflags, BIT(3)) +#define IN_WFIT __vcpu_single_flag(sflags, BIT(1)) /* vcpu system registers loaded on physical CPU */ -#define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(4)) -/* Software step state is Active-pending */ -#define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5)) +#define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(2)) +/* Software step state is Active-pending for external debug */ +#define HOST_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(3)) +/* Software step state is Active pending for guest debug */ +#define GUEST_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(4)) /* PMUSERENR for the guest EL0 is on physical CPU */ -#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(6)) +#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(5)) /* WFI instruction trapped */ -#define IN_WFI __vcpu_single_flag(sflags, BIT(7)) +#define IN_WFI __vcpu_single_flag(sflags, BIT(6)) /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ @@ -804,6 +927,9 @@ struct kvm_vcpu_arch { #define vcpu_sve_max_vq(vcpu) sve_vq_from_vl((vcpu)->arch.sve_max_vl) +#define vcpu_sve_zcr_elx(vcpu) \ + (unlikely(is_hyp_ctxt(vcpu)) ? ZCR_EL2 : ZCR_EL1) + #define vcpu_sve_state_size(vcpu) ({ \ size_t __size_ret; \ unsigned int __vcpu_vq; \ @@ -823,14 +949,21 @@ struct kvm_vcpu_arch { KVM_GUESTDBG_USE_HW | \ KVM_GUESTDBG_SINGLESTEP) -#define vcpu_has_sve(vcpu) (system_supports_sve() && \ - vcpu_get_flag(vcpu, GUEST_HAS_SVE)) +#define kvm_has_sve(kvm) (system_supports_sve() && \ + test_bit(KVM_ARCH_FLAG_GUEST_HAS_SVE, &(kvm)->arch.flags)) + +#ifdef __KVM_NVHE_HYPERVISOR__ +#define vcpu_has_sve(vcpu) kvm_has_sve(kern_hyp_va((vcpu)->kvm)) +#else +#define vcpu_has_sve(vcpu) kvm_has_sve((vcpu)->kvm) +#endif #ifdef CONFIG_ARM64_PTR_AUTH #define vcpu_has_ptrauth(vcpu) \ ((cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) || \ cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) && \ - vcpu_get_flag(vcpu, GUEST_HAS_PTRAUTH)) + (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_ADDRESS) || \ + vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_GENERIC))) #else #define vcpu_has_ptrauth(vcpu) false #endif @@ -856,7 +989,7 @@ struct kvm_vcpu_arch { * Don't bother with VNCR-based accesses in the nVHE code, it has no * business dealing with NV. */ -static inline u64 *__ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r) +static inline u64 *___ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r) { #if !defined (__KVM_NVHE_HYPERVISOR__) if (unlikely(cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) && @@ -866,9 +999,24 @@ static inline u64 *__ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r) return (u64 *)&ctxt->sys_regs[r]; } +#define __ctxt_sys_reg(c,r) \ + ({ \ + BUILD_BUG_ON(__builtin_constant_p(r) && \ + (r) >= NR_SYS_REGS); \ + ___ctxt_sys_reg(c, r); \ + }) + #define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r)) -#define __vcpu_sys_reg(v,r) (ctxt_sys_reg(&(v)->arch.ctxt, (r))) +u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64); +#define __vcpu_sys_reg(v,r) \ + (*({ \ + const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ + u64 *__r = __ctxt_sys_reg(ctxt, (r)); \ + if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \ + *__r = kvm_vcpu_apply_reg_masks((v), (r), *__r);\ + __r; \ + })) u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); @@ -895,6 +1043,10 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; + case TCR2_EL1: *val = read_sysreg_s(SYS_TCR2_EL12); break; + case PIR_EL1: *val = read_sysreg_s(SYS_PIR_EL12); break; + case PIRE0_EL1: *val = read_sysreg_s(SYS_PIRE0_EL12); break; + case POR_EL1: *val = read_sysreg_s(SYS_POR_EL12); break; case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; @@ -913,6 +1065,7 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; + case ZCR_EL1: *val = read_sysreg_s(SYS_ZCR_EL12); break; default: return false; } @@ -940,6 +1093,10 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; + case TCR2_EL1: write_sysreg_s(val, SYS_TCR2_EL12); break; + case PIR_EL1: write_sysreg_s(val, SYS_PIR_EL12); break; + case PIRE0_EL1: write_sysreg_s(val, SYS_PIRE0_EL12); break; + case POR_EL1: write_sysreg_s(val, SYS_POR_EL12); break; case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; @@ -958,6 +1115,7 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; + case ZCR_EL1: write_sysreg_s(val, SYS_ZCR_EL12); break; default: return false; } @@ -996,7 +1154,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); -#define vcpu_has_run_once(vcpu) !!rcu_access_pointer((vcpu)->pid) +#define vcpu_has_run_once(vcpu) (!!READ_ONCE((vcpu)->pid)) #ifndef __KVM_NVHE_HYPERVISOR__ #define kvm_call_hyp_nvhe(f, ...) \ @@ -1055,14 +1213,20 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu); int kvm_handle_sys_reg(struct kvm_vcpu *vcpu); int kvm_handle_cp10_id(struct kvm_vcpu *vcpu); +void kvm_sys_regs_create_debugfs(struct kvm *kvm); void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); int __init kvm_sys_reg_table_init(void); +struct sys_reg_desc; +int __init populate_sysreg_config(const struct sys_reg_desc *sr, + unsigned int idx); int __init populate_nv_trap_config(void); bool lock_all_vcpus(struct kvm *kvm); void unlock_all_vcpus(struct kvm *kvm); +void kvm_calculate_traps(struct kvm_vcpu *vcpu); + /* MMIO helpers */ void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); @@ -1095,7 +1259,7 @@ int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu, extern unsigned int __ro_after_init kvm_arm_vmid_bits; int __init kvm_arm_vmid_alloc_init(void); void __init kvm_arm_vmid_alloc_free(void); -bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid); +void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid); void kvm_arm_vmid_clear_active(void); static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) @@ -1114,6 +1278,51 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data); +/* + * How we access per-CPU host data depends on the where we access it from, + * and the mode we're in: + * + * - VHE and nVHE hypervisor bits use their locally defined instance + * + * - the rest of the kernel use either the VHE or nVHE one, depending on + * the mode we're running in. + * + * Unless we're in protected mode, fully deprivileged, and the nVHE + * per-CPU stuff is exclusively accessible to the protected EL2 code. + * In this case, the EL1 code uses the *VHE* data as its private state + * (which makes sense in a way as there shouldn't be any shared state + * between the host and the hypervisor). + * + * Yes, this is all totally trivial. Shoot me now. + */ +#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__) +#define host_data_ptr(f) (&this_cpu_ptr(&kvm_host_data)->f) +#else +#define host_data_ptr(f) \ + (static_branch_unlikely(&kvm_protected_mode_initialized) ? \ + &this_cpu_ptr(&kvm_host_data)->f : \ + &this_cpu_ptr_hyp_sym(kvm_host_data)->f) +#endif + +#define host_data_test_flag(flag) \ + (test_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags))) +#define host_data_set_flag(flag) \ + set_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags)) +#define host_data_clear_flag(flag) \ + clear_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags)) + +/* Check whether the FP regs are owned by the guest */ +static inline bool guest_owns_fp_regs(void) +{ + return *host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED; +} + +/* Check whether the FP regs are owned by the host */ +static inline bool host_owns_fp_regs(void) +{ + return *host_data_ptr(fp_owner) == FP_STATE_HOST_OWNED; +} + static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) { /* The host's MPIDR is immutable, so let's set it up at boot time */ @@ -1126,17 +1335,23 @@ static inline bool kvm_system_needs_idmapped_vectors(void) } static inline void kvm_arch_sync_events(struct kvm *kvm) {} -static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} -void kvm_arm_init_debug(void); -void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu); -void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); -void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); -void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); +void kvm_init_host_debug_data(void); +void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu); +void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu); +void kvm_debug_set_guest_ownership(struct kvm_vcpu *vcpu); +void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val); #define kvm_vcpu_os_lock_enabled(vcpu) \ (!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & OSLSR_EL1_OSLK)) +#define kvm_debug_regs_in_use(vcpu) \ + ((vcpu)->arch.debug_owner != VCPU_DEBUG_FREE) +#define kvm_host_owns_debug_regs(vcpu) \ + ((vcpu)->arch.debug_owner == VCPU_DEBUG_HOST_OWNED) +#define kvm_guest_owns_debug_regs(vcpu) \ + ((vcpu)->arch.debug_owner == VCPU_DEBUG_GUEST_OWNED) + int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, @@ -1157,34 +1372,36 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu); -void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu); static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr) { return (!has_vhe() && attr->exclude_host); } -/* Flags for host debug state */ -void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu); -void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu); - #ifdef CONFIG_KVM -void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr); -void kvm_clr_pmu_events(u32 clr); +void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr); +void kvm_clr_pmu_events(u64 clr); bool kvm_set_pmuserenr(u64 val); +void kvm_enable_trbe(void); +void kvm_disable_trbe(void); +void kvm_tracing_set_el1_configuration(u64 trfcr_while_in_guest); #else -static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} -static inline void kvm_clr_pmu_events(u32 clr) {} +static inline void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr) {} +static inline void kvm_clr_pmu_events(u64 clr) {} static inline bool kvm_set_pmuserenr(u64 val) { return false; } +static inline void kvm_enable_trbe(void) {} +static inline void kvm_disable_trbe(void) {} +static inline void kvm_tracing_set_el1_configuration(u64 trfcr_while_in_guest) {} #endif void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu); void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu); int __init kvm_set_ipa_limit(void); +u32 kvm_get_pa_bits(struct kvm *kvm); #define __KVM_HAVE_ARCH_VM_ALLOC struct kvm *kvm_arch_alloc_vm(void); @@ -1193,10 +1410,9 @@ struct kvm *kvm_arch_alloc_vm(void); #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE -static inline bool kvm_vm_is_protected(struct kvm *kvm) -{ - return false; -} +#define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.enabled) + +#define vcpu_is_protected(vcpu) kvm_vm_is_protected((vcpu)->kvm) int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature); bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); @@ -1219,8 +1435,11 @@ static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature) return test_bit(feature, ka->vcpu_features); } +#define kvm_vcpu_has_feature(k, f) __vcpu_has_feature(&(k)->arch, (f)) #define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f)) +#define kvm_vcpu_initialized(v) vcpu_get_flag(vcpu, VCPU_INITIALIZED) + int kvm_trng_call(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM extern phys_addr_t hyp_mem_base; @@ -1233,4 +1452,95 @@ static inline void kvm_hyp_reserve(void) { } void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu); bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu); +static inline u64 *__vm_id_reg(struct kvm_arch *ka, u32 reg) +{ + switch (reg) { + case sys_reg(3, 0, 0, 1, 0) ... sys_reg(3, 0, 0, 7, 7): + return &ka->id_regs[IDREG_IDX(reg)]; + case SYS_CTR_EL0: + return &ka->ctr_el0; + default: + WARN_ON_ONCE(1); + return NULL; + } +} + +#define kvm_read_vm_id_reg(kvm, reg) \ + ({ u64 __val = *__vm_id_reg(&(kvm)->arch, reg); __val; }) + +void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); + +#define __expand_field_sign_unsigned(id, fld, val) \ + ((u64)SYS_FIELD_VALUE(id, fld, val)) + +#define __expand_field_sign_signed(id, fld, val) \ + ({ \ + u64 __val = SYS_FIELD_VALUE(id, fld, val); \ + sign_extend64(__val, id##_##fld##_WIDTH - 1); \ + }) + +#define get_idreg_field_unsigned(kvm, id, fld) \ + ({ \ + u64 __val = kvm_read_vm_id_reg((kvm), SYS_##id); \ + FIELD_GET(id##_##fld##_MASK, __val); \ + }) + +#define get_idreg_field_signed(kvm, id, fld) \ + ({ \ + u64 __val = get_idreg_field_unsigned(kvm, id, fld); \ + sign_extend64(__val, id##_##fld##_WIDTH - 1); \ + }) + +#define get_idreg_field_enum(kvm, id, fld) \ + get_idreg_field_unsigned(kvm, id, fld) + +#define kvm_cmp_feat_signed(kvm, id, fld, op, limit) \ + (get_idreg_field_signed((kvm), id, fld) op __expand_field_sign_signed(id, fld, limit)) + +#define kvm_cmp_feat_unsigned(kvm, id, fld, op, limit) \ + (get_idreg_field_unsigned((kvm), id, fld) op __expand_field_sign_unsigned(id, fld, limit)) + +#define kvm_cmp_feat(kvm, id, fld, op, limit) \ + (id##_##fld##_SIGNED ? \ + kvm_cmp_feat_signed(kvm, id, fld, op, limit) : \ + kvm_cmp_feat_unsigned(kvm, id, fld, op, limit)) + +#define kvm_has_feat(kvm, id, fld, limit) \ + kvm_cmp_feat(kvm, id, fld, >=, limit) + +#define kvm_has_feat_enum(kvm, id, fld, val) \ + kvm_cmp_feat_unsigned(kvm, id, fld, ==, val) + +#define kvm_has_feat_range(kvm, id, fld, min, max) \ + (kvm_cmp_feat(kvm, id, fld, >=, min) && \ + kvm_cmp_feat(kvm, id, fld, <=, max)) + +/* Check for a given level of PAuth support */ +#define kvm_has_pauth(k, l) \ + ({ \ + bool pa, pi, pa3; \ + \ + pa = kvm_has_feat((k), ID_AA64ISAR1_EL1, APA, l); \ + pa &= kvm_has_feat((k), ID_AA64ISAR1_EL1, GPA, IMP); \ + pi = kvm_has_feat((k), ID_AA64ISAR1_EL1, API, l); \ + pi &= kvm_has_feat((k), ID_AA64ISAR1_EL1, GPI, IMP); \ + pa3 = kvm_has_feat((k), ID_AA64ISAR2_EL1, APA3, l); \ + pa3 &= kvm_has_feat((k), ID_AA64ISAR2_EL1, GPA3, IMP); \ + \ + (pa + pi + pa3) == 1; \ + }) + +#define kvm_has_fpmr(k) \ + (system_supports_fpmr() && \ + kvm_has_feat((k), ID_AA64PFR2_EL1, FPMR, IMP)) + +#define kvm_has_tcr2(k) \ + (kvm_has_feat((k), ID_AA64MMFR3_EL1, TCRX, IMP)) + +#define kvm_has_s1pie(k) \ + (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1PIE, IMP)) + +#define kvm_has_s1poe(k) \ + (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP)) + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 145ce73fc16c..c838309e4ec4 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -70,7 +70,7 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); /* * Without an __arch_swab32(), we fall back to ___constant_swab32(), but the * static inline can allow the compiler to out-of-line this. KVM always wants - * the macro version as its always inlined. + * the macro version as it's always inlined. */ #define __kvm_swab32(x) ___constant_swab32(x) @@ -80,8 +80,8 @@ void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if); -void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if); -void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if); int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); #ifdef __KVM_NVHE_HYPERVISOR__ @@ -111,7 +111,8 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu); void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); -void __sve_restore_state(void *sve_pffr, u32 *fpsr); +void __sve_save_state(void *sve_pffr, u32 *fpsr, int save_ffr); +void __sve_restore_state(void *sve_pffr, u32 *fpsr, int restore_ffr); u64 __guest_enter(struct kvm_vcpu *vcpu); @@ -123,8 +124,8 @@ void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr, #endif #ifdef __KVM_NVHE_HYPERVISOR__ -void __pkvm_init_switch_pgd(phys_addr_t phys, unsigned long size, - phys_addr_t pgd, void *sp, void *cont_fn); +void __pkvm_init_switch_pgd(phys_addr_t pgd, unsigned long sp, + void (*fn)(void)); int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus, unsigned long *per_cpu_base, u32 hyp_va_bits); void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); @@ -142,5 +143,6 @@ extern u64 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val); extern unsigned long kvm_nvhe_sym(__icache_flags); extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits); +extern unsigned int kvm_nvhe_sym(kvm_host_sve_max_vl); #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index e3e793d0ec30..b98ac6aa631f 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -54,27 +54,6 @@ #include <asm/alternative.h> /* - * Convert a kernel VA into a HYP VA. - * reg: VA to be converted. - * - * The actual code generation takes place in kvm_update_va_mask, and - * the instructions below are only there to reserve the space and - * perform the register allocation (kvm_update_va_mask uses the - * specific registers encoded in the instructions). - */ -.macro kern_hyp_va reg -#ifndef __KVM_VHE_HYPERVISOR__ -alternative_cb ARM64_ALWAYS_SYSTEM, kvm_update_va_mask - and \reg, \reg, #1 /* mask with va_mask */ - ror \reg, \reg, #1 /* rotate to the first tag bit */ - add \reg, \reg, #0 /* insert the low 12 bits of the tag */ - add \reg, \reg, #0, lsl 12 /* insert the top 12 bits of the tag */ - ror \reg, \reg, #63 /* rotate back */ -alternative_cb_end -#endif -.endm - -/* * Convert a hypervisor VA to a PA * reg: hypervisor address to be converted in place * tmp: temporary register @@ -119,6 +98,7 @@ alternative_cb_end #include <asm/mmu_context.h> #include <asm/kvm_emulate.h> #include <asm/kvm_host.h> +#include <asm/kvm_nested.h> void kvm_update_va_mask(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); @@ -127,14 +107,29 @@ void kvm_apply_hyp_relocations(void); #define __hyp_pa(x) (((phys_addr_t)(x)) + hyp_physvirt_offset) +/* + * Convert a kernel VA into a HYP VA. + * + * Can be called from hyp or non-hyp context. + * + * The actual code generation takes place in kvm_update_va_mask(), and + * the instructions below are only there to reserve the space and + * perform the register allocation (kvm_update_va_mask() uses the + * specific registers encoded in the instructions). + */ static __always_inline unsigned long __kern_hyp_va(unsigned long v) { +/* + * This #ifndef is an optimisation for when this is called from VHE hyp + * context. When called from a VHE non-hyp context, kvm_update_va_mask() will + * replace the instructions with `nop`s. + */ #ifndef __KVM_VHE_HYPERVISOR__ - asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n" - "ror %0, %0, #1\n" - "add %0, %0, #0\n" - "add %0, %0, #0, lsl 12\n" - "ror %0, %0, #63\n", + asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n" /* mask with va_mask */ + "ror %0, %0, #1\n" /* rotate to the first tag bit */ + "add %0, %0, #0\n" /* insert the low 12 bits of the tag */ + "add %0, %0, #0, lsl 12\n" /* insert the top 12 bits of the tag */ + "ror %0, %0, #63\n", /* rotate back */ ARM64_ALWAYS_SYSTEM, kvm_update_va_mask) : "+r" (v)); @@ -144,6 +139,8 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) #define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v)))) +extern u32 __hyp_va_bits; + /* * We currently support using a VM-specified IPA size. For backward * compatibility, the default IPA size is fixed to 40bits. @@ -171,6 +168,11 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr); void __init free_hyp_pgds(void); +void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, + u64 size, bool may_block); +void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end); +void kvm_stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end); + void stage2_unmap_vm(struct kvm *kvm); int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type); void kvm_uninit_stage2_mmu(struct kvm *kvm); @@ -332,5 +334,48 @@ static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu) { return container_of(mmu->arch, struct kvm, arch); } + +static inline u64 get_vmid(u64 vttbr) +{ + return (vttbr & VTTBR_VMID_MASK(kvm_get_vmid_bits())) >> + VTTBR_VMID_SHIFT; +} + +static inline bool kvm_s2_mmu_valid(struct kvm_s2_mmu *mmu) +{ + return !(mmu->tlb_vttbr & VTTBR_CNP_BIT); +} + +static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) +{ + /* + * Be careful, mmu may not be fully initialised so do look at + * *any* of its fields. + */ + return &kvm->arch.mmu != mmu; +} + +static inline void kvm_fault_lock(struct kvm *kvm) +{ + if (is_protected_kvm_enabled()) + write_lock(&kvm->mmu_lock); + else + read_lock(&kvm->mmu_lock); +} + +static inline void kvm_fault_unlock(struct kvm *kvm) +{ + if (is_protected_kvm_enabled()) + write_unlock(&kvm->mmu_lock); + else + read_unlock(&kvm->mmu_lock); +} + +#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS +void kvm_s2_ptdump_create_debugfs(struct kvm *kvm); +#else +static inline void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) {} +#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */ + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 4882905357f4..56c4bcd35e2e 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -5,6 +5,7 @@ #include <linux/bitfield.h> #include <linux/kvm_host.h> #include <asm/kvm_emulate.h> +#include <asm/kvm_pgtable.h> static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu) { @@ -32,14 +33,16 @@ static inline u64 translate_tcr_el2_to_tcr_el1(u64 tcr) static inline u64 translate_cptr_el2_to_cpacr_el1(u64 cptr_el2) { - u64 cpacr_el1 = 0; + u64 cpacr_el1 = CPACR_EL1_RES1; if (cptr_el2 & CPTR_EL2_TTA) - cpacr_el1 |= CPACR_ELx_TTA; + cpacr_el1 |= CPACR_EL1_TTA; if (!(cptr_el2 & CPTR_EL2_TFP)) - cpacr_el1 |= CPACR_ELx_FPEN; + cpacr_el1 |= CPACR_EL1_FPEN; if (!(cptr_el2 & CPTR_EL2_TZ)) - cpacr_el1 |= CPACR_ELx_ZEN; + cpacr_el1 |= CPACR_EL1_ZEN; + + cpacr_el1 |= cptr_el2 & (CPTR_EL2_TCPAC | CPTR_EL2_TAM); return cpacr_el1; } @@ -60,8 +63,185 @@ static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0) return ttbr0 & ~GENMASK_ULL(63, 48); } -extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu); +extern bool forward_smc_trap(struct kvm_vcpu *vcpu); +extern bool forward_debug_exception(struct kvm_vcpu *vcpu); +extern void kvm_init_nested(struct kvm *kvm); +extern int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu); +extern void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu); +extern struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu); + +union tlbi_info; + +extern void kvm_s2_mmu_iterate_by_vmid(struct kvm *kvm, u16 vmid, + const union tlbi_info *info, + void (*)(struct kvm_s2_mmu *, + const union tlbi_info *)); +extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu); +extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu); + +extern void check_nested_vcpu_requests(struct kvm_vcpu *vcpu); + +struct kvm_s2_trans { + phys_addr_t output; + unsigned long block_size; + bool writable; + bool readable; + int level; + u32 esr; + u64 desc; +}; + +static inline phys_addr_t kvm_s2_trans_output(struct kvm_s2_trans *trans) +{ + return trans->output; +} + +static inline unsigned long kvm_s2_trans_size(struct kvm_s2_trans *trans) +{ + return trans->block_size; +} + +static inline u32 kvm_s2_trans_esr(struct kvm_s2_trans *trans) +{ + return trans->esr; +} + +static inline bool kvm_s2_trans_readable(struct kvm_s2_trans *trans) +{ + return trans->readable; +} + +static inline bool kvm_s2_trans_writable(struct kvm_s2_trans *trans) +{ + return trans->writable; +} + +static inline bool kvm_s2_trans_executable(struct kvm_s2_trans *trans) +{ + return !(trans->desc & BIT(54)); +} + +extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, + struct kvm_s2_trans *result); +extern int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, + struct kvm_s2_trans *trans); +extern int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2); +extern void kvm_nested_s2_wp(struct kvm *kvm); +extern void kvm_nested_s2_unmap(struct kvm *kvm, bool may_block); +extern void kvm_nested_s2_flush(struct kvm *kvm); + +unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val); + +static inline bool kvm_supported_tlbi_s1e1_op(struct kvm_vcpu *vpcu, u32 instr) +{ + struct kvm *kvm = vpcu->kvm; + u8 CRm = sys_reg_CRm(instr); -int kvm_init_nv_sysregs(struct kvm *kvm); + if (!(sys_reg_Op0(instr) == TLBI_Op0 && + sys_reg_Op1(instr) == TLBI_Op1_EL1)) + return false; + + if (!(sys_reg_CRn(instr) == TLBI_CRn_XS || + (sys_reg_CRn(instr) == TLBI_CRn_nXS && + kvm_has_feat(kvm, ID_AA64ISAR1_EL1, XS, IMP)))) + return false; + + if (CRm == TLBI_CRm_nROS && + !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS)) + return false; + + if ((CRm == TLBI_CRm_RIS || CRm == TLBI_CRm_ROS || + CRm == TLBI_CRm_RNS) && + !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE)) + return false; + + return true; +} + +static inline bool kvm_supported_tlbi_s1e2_op(struct kvm_vcpu *vpcu, u32 instr) +{ + struct kvm *kvm = vpcu->kvm; + u8 CRm = sys_reg_CRm(instr); + + if (!(sys_reg_Op0(instr) == TLBI_Op0 && + sys_reg_Op1(instr) == TLBI_Op1_EL2)) + return false; + + if (!(sys_reg_CRn(instr) == TLBI_CRn_XS || + (sys_reg_CRn(instr) == TLBI_CRn_nXS && + kvm_has_feat(kvm, ID_AA64ISAR1_EL1, XS, IMP)))) + return false; + + if (CRm == TLBI_CRm_IPAIS || CRm == TLBI_CRm_IPAONS) + return false; + + if (CRm == TLBI_CRm_nROS && + !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS)) + return false; + + if ((CRm == TLBI_CRm_RIS || CRm == TLBI_CRm_ROS || + CRm == TLBI_CRm_RNS) && + !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE)) + return false; + + return true; +} + +int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu); + +#ifdef CONFIG_ARM64_PTR_AUTH +bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr); +#else +static inline bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr) +{ + /* We really should never execute this... */ + WARN_ON_ONCE(1); + *elr = 0xbad9acc0debadbad; + return false; +} +#endif + +#define KVM_NV_GUEST_MAP_SZ (KVM_PGTABLE_PROT_SW1 | KVM_PGTABLE_PROT_SW0) + +static inline u64 kvm_encode_nested_level(struct kvm_s2_trans *trans) +{ + return FIELD_PREP(KVM_NV_GUEST_MAP_SZ, trans->level); +} + +/* Adjust alignment for the contiguous bit as per StageOA() */ +#define contiguous_bit_shift(d, wi, l) \ + ({ \ + u8 shift = 0; \ + \ + if ((d) & PTE_CONT) { \ + switch (BIT((wi)->pgshift)) { \ + case SZ_4K: \ + shift = 4; \ + break; \ + case SZ_16K: \ + shift = (l) == 2 ? 5 : 7; \ + break; \ + case SZ_64K: \ + shift = 5; \ + break; \ + } \ + } \ + \ + shift; \ + }) + +static inline unsigned int ps_to_output_size(unsigned int ps) +{ + switch (ps) { + case 0: return 32; + case 1: return 36; + case 2: return 40; + case 3: return 42; + case 4: return 44; + case 5: + default: + return 48; + } +} #endif /* __ARM64_KVM_NESTED_H */ diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index cfdf40f734b1..6b9d274052c7 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -59,6 +59,48 @@ typedef u64 kvm_pte_t; #define KVM_PHYS_INVALID (-1ULL) +#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2) + +#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2) +#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6) +#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO \ + ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; }) +#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW \ + ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; }) +#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8) +#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3 +#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10) + +#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2) +#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6) +#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7) +#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8) +#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3 +#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10) + +#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50) + +#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55) + +#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) + +#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54) + +#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50) + +#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \ + KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \ + KVM_PTE_LEAF_ATTR_HI_S2_XN) + +#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2) +#define KVM_MAX_OWNER_ID 1 + +/* + * Used to indicate a pte for which a 'break-before-make' sequence is in + * progress. + */ +#define KVM_INVALID_PTE_LOCKED BIT(10) + static inline bool kvm_pte_valid(kvm_pte_t pte) { return pte & KVM_PTE_VALID; @@ -197,6 +239,7 @@ enum kvm_pgtable_stage2_flags { * @KVM_PGTABLE_PROT_W: Write permission. * @KVM_PGTABLE_PROT_R: Read permission. * @KVM_PGTABLE_PROT_DEVICE: Device attributes. + * @KVM_PGTABLE_PROT_NORMAL_NC: Normal noncacheable attributes. * @KVM_PGTABLE_PROT_SW0: Software bit 0. * @KVM_PGTABLE_PROT_SW1: Software bit 1. * @KVM_PGTABLE_PROT_SW2: Software bit 2. @@ -208,6 +251,7 @@ enum kvm_pgtable_prot { KVM_PGTABLE_PROT_R = BIT(2), KVM_PGTABLE_PROT_DEVICE = BIT(3), + KVM_PGTABLE_PROT_NORMAL_NC = BIT(4), KVM_PGTABLE_PROT_SW0 = BIT(55), KVM_PGTABLE_PROT_SW1 = BIT(56), @@ -368,15 +412,20 @@ static inline bool kvm_pgtable_walk_lock_held(void) * be used instead of block mappings. */ struct kvm_pgtable { - u32 ia_bits; - s8 start_level; - kvm_pteref_t pgd; - struct kvm_pgtable_mm_ops *mm_ops; - - /* Stage-2 only */ - struct kvm_s2_mmu *mmu; - enum kvm_pgtable_stage2_flags flags; - kvm_pgtable_force_pte_cb_t force_pte_cb; + union { + struct rb_root pkvm_mappings; + struct { + u32 ia_bits; + s8 start_level; + kvm_pteref_t pgd; + struct kvm_pgtable_mm_ops *mm_ops; + + /* Stage-2 only */ + enum kvm_pgtable_stage2_flags flags; + kvm_pgtable_force_pte_cb_t force_pte_cb; + }; + }; + struct kvm_s2_mmu *mmu; }; /** @@ -482,8 +531,11 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, enum kvm_pgtable_stage2_flags flags, kvm_pgtable_force_pte_cb_t force_pte_cb); -#define kvm_pgtable_stage2_init(pgt, mmu, mm_ops) \ - __kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL) +static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, + struct kvm_pgtable_mm_ops *mm_ops) +{ + return __kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL); +} /** * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table. @@ -625,15 +677,15 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size); * kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address to identify the page-table entry. + * @flags: Flags to control the page-table walk (ex. a shared walk) * * The offset of @addr within a page is ignored. * * If there is a valid, leaf page-table entry used to translate @addr, then * set the access flag in that entry. - * - * Return: The old page-table entry prior to setting the flag, 0 on failure. */ -kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr); +void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr, + enum kvm_pgtable_walk_flags flags); /** * kvm_pgtable_stage2_test_clear_young() - Test and optionally clear the access @@ -663,6 +715,7 @@ bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address to identify the page-table entry. * @prot: Additional permissions to grant for the mapping. + * @flags: Flags to control the page-table walk (ex. a shared walk) * * The offset of @addr within a page is ignored. * @@ -675,7 +728,8 @@ bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, * Return: 0 on success, negative error code on failure. */ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, - enum kvm_pgtable_prot prot); + enum kvm_pgtable_prot prot, + enum kvm_pgtable_walk_flags flags); /** * kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index ad9cfb5c1ff4..eb65f12e81d9 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -20,6 +20,31 @@ int pkvm_init_host_vm(struct kvm *kvm); int pkvm_create_hyp_vm(struct kvm *kvm); void pkvm_destroy_hyp_vm(struct kvm *kvm); +/* + * This functions as an allow-list of protected VM capabilities. + * Features not explicitly allowed by this function are denied. + */ +static inline bool kvm_pvm_ext_allowed(long ext) +{ + switch (ext) { + case KVM_CAP_IRQCHIP: + case KVM_CAP_ARM_PSCI: + case KVM_CAP_ARM_PSCI_0_2: + case KVM_CAP_NR_VCPUS: + case KVM_CAP_MAX_VCPUS: + case KVM_CAP_MAX_VCPU_ID: + case KVM_CAP_MSI_DEVID: + case KVM_CAP_ARM_VM_IPA_SIZE: + case KVM_CAP_ARM_PMU_V3: + case KVM_CAP_ARM_SVE: + case KVM_CAP_ARM_PTRAUTH_ADDRESS: + case KVM_CAP_ARM_PTRAUTH_GENERIC: + return true; + default: + return false; + } +} + extern struct memblock_region kvm_nvhe_sym(hyp_memory)[]; extern unsigned int kvm_nvhe_sym(hyp_memblock_nr); @@ -128,4 +153,39 @@ static inline unsigned long hyp_ffa_proxy_pages(void) return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE); } +static inline size_t pkvm_host_sve_state_size(void) +{ + if (!system_supports_sve()) + return 0; + + return size_add(sizeof(struct cpu_sve_state), + SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl))); +} + +struct pkvm_mapping { + struct rb_node node; + u64 gfn; + u64 pfn; +}; + +int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, + struct kvm_pgtable_mm_ops *mm_ops); +void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); +int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, + enum kvm_pgtable_prot prot, void *mc, + enum kvm_pgtable_walk_flags flags); +int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size); +int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size); +int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size); +bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold); +int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot, + enum kvm_pgtable_walk_flags flags); +void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr, + enum kvm_pgtable_walk_flags flags); +int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size, + struct kvm_mmu_memory_cache *mc); +void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level); +kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level, + enum kvm_pgtable_prot prot, void *mc, + bool force_pte); #endif /* __ARM64_KVM_PKVM_H__ */ diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h index 0cd0965255d2..6199c9f7ec6e 100644 --- a/arch/arm64/include/asm/kvm_ptrauth.h +++ b/arch/arm64/include/asm/kvm_ptrauth.h @@ -99,5 +99,26 @@ alternative_else_nop_endif .macro ptrauth_switch_to_hyp g_ctxt, h_ctxt, reg1, reg2, reg3 .endm #endif /* CONFIG_ARM64_PTR_AUTH */ + +#else /* !__ASSEMBLY */ + +#define __ptrauth_save_key(ctxt, key) \ + do { \ + u64 __val; \ + __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ + ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \ + __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ + ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \ + } while(0) + +#define ptrauth_save_keys(ctxt) \ + do { \ + __ptrauth_save_key(ctxt, APIA); \ + __ptrauth_save_key(ctxt, APIB); \ + __ptrauth_save_key(ctxt, APDA); \ + __ptrauth_save_key(ctxt, APDB); \ + __ptrauth_save_key(ctxt, APGA); \ + } while(0) + #endif /* __ASSEMBLY__ */ #endif /* __ASM_KVM_PTRAUTH_H */ diff --git a/arch/arm64/include/asm/mem_encrypt.h b/arch/arm64/include/asm/mem_encrypt.h new file mode 100644 index 000000000000..f8f78f622dd2 --- /dev/null +++ b/arch/arm64/include/asm/mem_encrypt.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_MEM_ENCRYPT_H +#define __ASM_MEM_ENCRYPT_H + +#include <asm/rsi.h> + +struct arm64_mem_crypt_ops { + int (*encrypt)(unsigned long addr, int numpages); + int (*decrypt)(unsigned long addr, int numpages); +}; + +int arm64_mem_crypt_ops_register(const struct arm64_mem_crypt_ops *ops); + +int set_memory_encrypted(unsigned long addr, int numpages); +int set_memory_decrypted(unsigned long addr, int numpages); + +int realm_register_memory_enc_ops(void); + +static inline bool force_dma_unencrypted(struct device *dev) +{ + return is_realm_world(); +} + +#endif /* __ASM_MEM_ENCRYPT_H */ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index d82305ab420f..717829df294e 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -30,8 +30,8 @@ * keep a constant PAGE_OFFSET and "fallback" to using the higher end * of the VMEMMAP where 52-bit support is not available in hardware. */ -#define VMEMMAP_SHIFT (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT) -#define VMEMMAP_SIZE ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) >> VMEMMAP_SHIFT) +#define VMEMMAP_RANGE (_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) +#define VMEMMAP_SIZE ((VMEMMAP_RANGE >> PAGE_SHIFT) * sizeof(struct page)) /* * PAGE_OFFSET - the virtual address of the start of the linear map, at the @@ -47,14 +47,18 @@ #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) #define MODULES_VADDR (_PAGE_END(VA_BITS_MIN)) #define MODULES_VSIZE (SZ_2G) -#define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT))) -#define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE) -#define PCI_IO_END (VMEMMAP_START - SZ_8M) -#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) -#define FIXADDR_TOP (VMEMMAP_START - SZ_32M) +#define VMEMMAP_START (VMEMMAP_END - VMEMMAP_SIZE) +#define VMEMMAP_END (-UL(SZ_1G)) +#define PCI_IO_START (VMEMMAP_END + SZ_8M) +#define PCI_IO_END (PCI_IO_START + PCI_IO_SIZE) +#define FIXADDR_TOP (-UL(SZ_8M)) #if VA_BITS > 48 +#ifdef CONFIG_ARM64_16K_PAGES +#define VA_BITS_MIN (47) +#else #define VA_BITS_MIN (48) +#endif #else #define VA_BITS_MIN (VA_BITS) #endif @@ -106,6 +110,8 @@ #define PAGE_END (_PAGE_END(VA_BITS_MIN)) #endif /* CONFIG_KASAN */ +#define DIRECT_MAP_PHYSMEM_END __pa(PAGE_END - 1) + #define MIN_THREAD_SHIFT (14 + KASAN_THREAD_SHIFT) /* @@ -139,13 +145,16 @@ #define OVERFLOW_STACK_SIZE SZ_4K +#define NVHE_STACK_SHIFT PAGE_SHIFT +#define NVHE_STACK_SIZE (UL(1) << NVHE_STACK_SHIFT) + /* * With the minimum frame size of [x29, x30], exactly half the combined * sizes of the hyp and overflow stacks is the maximum size needed to * save the unwinded stacktrace; plus an additional entry to delimit the * end. */ -#define NVHE_STACKTRACE_SIZE ((OVERFLOW_STACK_SIZE + PAGE_SIZE) / 2 + sizeof(long)) +#define NVHE_STACKTRACE_SIZE ((OVERFLOW_STACK_SIZE + NVHE_STACK_SIZE) / 2 + sizeof(long)) /* * Alignment of kernel segments (e.g. .text, .data). @@ -173,6 +182,7 @@ * Memory types for Stage-2 translation */ #define MT_S2_NORMAL 0xf +#define MT_S2_NORMAL_NC 0x5 #define MT_S2_DEVICE_nGnRE 0x1 /* @@ -180,6 +190,7 @@ * Stage-2 enforces Normal-WB and Device-nGnRE */ #define MT_S2_FWB_NORMAL 6 +#define MT_S2_FWB_NORMAL_NC 5 #define MT_S2_FWB_DEVICE_nGnRE 1 #ifdef CONFIG_ARM64_4K_PAGES @@ -209,9 +220,20 @@ #include <asm/boot.h> #include <asm/bug.h> #include <asm/sections.h> +#include <asm/sysreg.h> + +static inline u64 __pure read_tcr(void) +{ + u64 tcr; + + // read_sysreg() uses asm volatile, so avoid it here + asm("mrs %0, tcr_el1" : "=r"(tcr)); + return tcr; +} #if VA_BITS > 48 -extern u64 vabits_actual; +// For reasons of #include hell, we can't use TCR_T1SZ_OFFSET/TCR_T1SZ_MASK here +#define vabits_actual (64 - ((read_tcr() >> 16) & 63)) #else #define vabits_actual ((u64)VA_BITS) #endif @@ -335,12 +357,6 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x); #define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset)) /* - * Convert a page to/from a physical address - */ -#define page_to_phys(page) (__pfn_to_phys(page_to_pfn(page))) -#define phys_to_page(phys) (pfn_to_page(__phys_to_pfn(phys))) - -/* * Note: Drivers should NOT use these. They are the wrong * translation for translating DMA addresses. Use the driver * DMA support - see dma-mapping.h. diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h index 5966ee4a6154..21df8bbd2668 100644 --- a/arch/arm64/include/asm/mman.h +++ b/arch/arm64/include/asm/mman.h @@ -2,12 +2,17 @@ #ifndef __ASM_MMAN_H__ #define __ASM_MMAN_H__ +#include <uapi/asm/mman.h> + +#ifndef BUILD_VDSO #include <linux/compiler.h> +#include <linux/fs.h> +#include <linux/hugetlb.h> +#include <linux/shmem_fs.h> #include <linux/types.h> -#include <uapi/asm/mman.h> static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, - unsigned long pkey __always_unused) + unsigned long pkey) { unsigned long ret = 0; @@ -17,23 +22,36 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, if (system_supports_mte() && (prot & PROT_MTE)) ret |= VM_MTE; +#ifdef CONFIG_ARCH_HAS_PKEYS + if (system_supports_poe()) { + ret |= pkey & BIT(0) ? VM_PKEY_BIT0 : 0; + ret |= pkey & BIT(1) ? VM_PKEY_BIT1 : 0; + ret |= pkey & BIT(2) ? VM_PKEY_BIT2 : 0; + } +#endif + return ret; } #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) -static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags) +static inline unsigned long arch_calc_vm_flag_bits(struct file *file, + unsigned long flags) { /* * Only allow MTE on anonymous mappings as these are guaranteed to be * backed by tags-capable memory. The vm_flags may be overridden by a * filesystem supporting MTE (RAM-based). */ - if (system_supports_mte() && (flags & MAP_ANONYMOUS)) - return VM_MTE_ALLOWED; + if (system_supports_mte()) { + if (flags & (MAP_ANONYMOUS | MAP_HUGETLB)) + return VM_MTE_ALLOWED; + if (shmem_file(file) || is_file_hugepages(file)) + return VM_MTE_ALLOWED; + } return 0; } -#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags) +#define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags) static inline bool arch_validate_prot(unsigned long prot, unsigned long addr __always_unused) @@ -52,12 +70,29 @@ static inline bool arch_validate_prot(unsigned long prot, static inline bool arch_validate_flags(unsigned long vm_flags) { - if (!system_supports_mte()) - return true; + if (system_supports_mte()) { + /* + * only allow VM_MTE if VM_MTE_ALLOWED has been set + * previously + */ + if ((vm_flags & VM_MTE) && !(vm_flags & VM_MTE_ALLOWED)) + return false; + } + + if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) { + /* An executable GCS isn't a good idea. */ + if (vm_flags & VM_EXEC) + return false; + + /* The memory management core should prevent this */ + VM_WARN_ON(vm_flags & VM_SHARED); + } + + return true; - /* only allow VM_MTE if VM_MTE_ALLOWED has been set previously */ - return !(vm_flags & VM_MTE) || (vm_flags & VM_MTE_ALLOWED); } #define arch_validate_flags(vm_flags) arch_validate_flags(vm_flags) +#endif /* !BUILD_VDSO */ + #endif /* ! __ASM_MMAN_H__ */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 2fcf51231d6e..662471cfc536 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -25,6 +25,7 @@ typedef struct { refcount_t pinned; void *vdso; unsigned long flags; + u8 pkey_allocation_map; } mm_context_t; /* @@ -63,7 +64,6 @@ static inline bool arm64_kernel_unmapped_at_el0(void) extern void arm64_memblock_init(void); extern void paging_init(void); extern void bootmem_init(void); -extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); extern void create_mapping_noalloc(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot); extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, @@ -71,10 +71,43 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, pgprot_t prot, bool page_mappings_only); extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot); extern void mark_linear_text_alias_ro(void); -extern bool kaslr_requires_kpti(void); -#define INIT_MM_CONTEXT(name) \ - .pgd = init_pg_dir, +/* + * This check is triggered during the early boot before the cpufeature + * is initialised. Checking the status on the local CPU allows the boot + * CPU to detect the need for non-global mappings and thus avoiding a + * pagetable re-write after all the CPUs are booted. This check will be + * anyway run on individual CPUs, allowing us to get the consistent + * state once the SMP CPUs are up and thus make the switch to non-global + * mappings if required. + */ +static inline bool kaslr_requires_kpti(void) +{ + /* + * E0PD does a similar job to KPTI so can be used instead + * where available. + */ + if (IS_ENABLED(CONFIG_ARM64_E0PD)) { + u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1); + if (cpuid_feature_extract_unsigned_field(mmfr2, + ID_AA64MMFR2_EL1_E0PD_SHIFT)) + return false; + } + + /* + * Systems affected by Cavium erratum 24756 are incompatible + * with KPTI. + */ + if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) { + extern const struct midr_range cavium_erratum_27456_cpus[]; + + if (is_midr_in_range_list(read_cpuid_id(), + cavium_erratum_27456_cpus)) + return false; + } + + return true; +} #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 9ce4200508b1..0dbe3b29049b 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -15,12 +15,13 @@ #include <linux/sched/hotplug.h> #include <linux/mm_types.h> #include <linux/pgtable.h> +#include <linux/pkeys.h> #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/daifflags.h> +#include <asm/gcs.h> #include <asm/proc-fns.h> -#include <asm-generic/mm_hooks.h> #include <asm/cputype.h> #include <asm/sysreg.h> #include <asm/tlbflush.h> @@ -61,11 +62,9 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm) } /* - * TCR.T0SZ value to use when the ID map is active. Usually equals - * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in - * physical memory, in which case it will be smaller. + * TCR.T0SZ value to use when the ID map is active. */ -extern int idmap_t0sz; +#define idmap_t0sz TCR_T0SZ(IDMAP_VA_BITS) /* * Ensure TCR.T0SZ is set to the provided value. @@ -74,11 +73,11 @@ static inline void __cpu_set_tcr_t0sz(unsigned long t0sz) { unsigned long tcr = read_sysreg(tcr_el1); - if ((tcr & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET == t0sz) + if ((tcr & TCR_T0SZ_MASK) == t0sz) return; tcr &= ~TCR_T0SZ_MASK; - tcr |= t0sz << TCR_T0SZ_OFFSET; + tcr |= t0sz; write_sysreg(tcr, tcr_el1); isb(); } @@ -110,18 +109,13 @@ static inline void cpu_uninstall_idmap(void) cpu_switch_mm(mm->pgd, mm); } -static inline void __cpu_install_idmap(pgd_t *idmap) +static inline void cpu_install_idmap(void) { cpu_set_reserved_ttbr0(); local_flush_tlb_all(); cpu_set_idmap_tcr_t0sz(); - cpu_switch_mm(lm_alias(idmap), &init_mm); -} - -static inline void cpu_install_idmap(void) -{ - __cpu_install_idmap(idmap_pg_dir); + cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm); } /* @@ -148,51 +142,21 @@ static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz) isb(); } -/* - * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, - * avoiding the possibility of conflicting TLB entries being allocated. - */ -static inline void __cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap, bool cnp) -{ - typedef void (ttbr_replace_func)(phys_addr_t); - extern ttbr_replace_func idmap_cpu_replace_ttbr1; - ttbr_replace_func *replace_phys; - unsigned long daif; - - /* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */ - phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp)); - - if (cnp) - ttbr1 |= TTBR_CNP_BIT; - - replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1); - - __cpu_install_idmap(idmap); - - /* - * We really don't want to take *any* exceptions while TTBR1 is - * in the process of being replaced so mask everything. - */ - daif = local_daif_save(); - replace_phys(ttbr1); - local_daif_restore(daif); - - cpu_uninstall_idmap(); -} +void __cpu_replace_ttbr1(pgd_t *pgdp, bool cnp); static inline void cpu_enable_swapper_cnp(void) { - __cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir, true); + __cpu_replace_ttbr1(lm_alias(swapper_pg_dir), true); } -static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) +static inline void cpu_replace_ttbr1(pgd_t *pgdp) { /* * Only for early TTBR1 replacement before cpucaps are finalized and * before we've decided whether to use CNP. */ WARN_ON(system_capabilities_finalized()); - __cpu_replace_ttbr1(pgdp, idmap, false); + __cpu_replace_ttbr1(pgdp, false); } /* @@ -212,9 +176,36 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm) { atomic64_set(&mm->context.id, 0); refcount_set(&mm->context.pinned, 0); + + /* pkey 0 is the default, so always reserve it. */ + mm->context.pkey_allocation_map = BIT(0); + return 0; } +static inline void arch_dup_pkeys(struct mm_struct *oldmm, + struct mm_struct *mm) +{ + /* Duplicate the oldmm pkey state in mm: */ + mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map; +} + +static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) +{ + arch_dup_pkeys(oldmm, mm); + + return 0; +} + +static inline void arch_exit_mmap(struct mm_struct *mm) +{ +} + +static inline void arch_unmap(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ +} + #ifdef CONFIG_ARM64_SW_TTBR0_PAN static inline void update_saved_ttbr0(struct task_struct *tsk, struct mm_struct *mm) @@ -280,18 +271,26 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, } static inline const struct cpumask * -task_cpu_possible_mask(struct task_struct *p) +__task_cpu_possible_mask(struct task_struct *p, const struct cpumask *mask) { if (!static_branch_unlikely(&arm64_mismatched_32bit_el0)) - return cpu_possible_mask; + return mask; if (!is_compat_thread(task_thread_info(p))) - return cpu_possible_mask; + return mask; return system_32bit_el0_cpumask(); } + +static inline const struct cpumask * +task_cpu_possible_mask(struct task_struct *p) +{ + return __task_cpu_possible_mask(p, cpu_possible_mask); +} #define task_cpu_possible_mask task_cpu_possible_mask +const struct cpumask *task_cpu_fallback_mask(struct task_struct *p); + void verify_cpu_asid_bits(void); void post_ttbr_update_workaround(void); @@ -304,6 +303,31 @@ static inline unsigned long mm_untag_mask(struct mm_struct *mm) return -1UL >> 8; } +/* + * Only enforce protection keys on the current process, because there is no + * user context to access POR_EL0 for another address space. + */ +static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, + bool write, bool execute, bool foreign) +{ + if (!system_supports_poe()) + return true; + + /* allow access if the VMA is not one from this process */ + if (foreign || vma_is_foreign(vma)) + return true; + + return por_el0_allows_pkey(vma_pkey(vma), write, execute); +} + +#define deactivate_mm deactivate_mm +static inline void deactivate_mm(struct task_struct *tsk, + struct mm_struct *mm) +{ + gcs_free(tsk); +} + + #include <asm-generic/mmu_context.h> #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/mmzone.h b/arch/arm64/include/asm/mmzone.h deleted file mode 100644 index fa17e01d9ab2..000000000000 --- a/arch/arm64/include/asm/mmzone.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_MMZONE_H -#define __ASM_MMZONE_H - -#ifdef CONFIG_NUMA - -#include <asm/numa.h> - -extern struct pglist_data *node_data[]; -#define NODE_DATA(nid) (node_data[(nid)]) - -#endif /* CONFIG_NUMA */ -#endif /* __ASM_MMZONE_H */ diff --git a/arch/arm64/include/asm/mshyperv.h b/arch/arm64/include/asm/mshyperv.h index 20070a847304..2e2f83bafcfb 100644 --- a/arch/arm64/include/asm/mshyperv.h +++ b/arch/arm64/include/asm/mshyperv.h @@ -6,9 +6,8 @@ * the ARM64 architecture. See include/asm-generic/mshyperv.h for * definitions are that architecture independent. * - * Definitions that are specified in the Hyper-V Top Level Functional - * Spec (TLFS) should not go in this file, but should instead go in - * hyperv-tlfs.h. + * Definitions that are derived from Hyper-V code or headers should not go in + * this file, but should instead go in the relevant files in include/hyperv. * * Copyright (C) 2021, Microsoft, Inc. * @@ -20,7 +19,7 @@ #include <linux/types.h> #include <linux/arm-smccc.h> -#include <asm/hyperv-tlfs.h> +#include <hyperv/hvhdk.h> /* * Declare calls to get and set Hyper-V VP register values on ARM64, which @@ -31,12 +30,12 @@ void hv_set_vpreg(u32 reg, u64 value); u64 hv_get_vpreg(u32 reg); void hv_get_vpreg_128(u32 reg, struct hv_get_vp_registers_output *result); -static inline void hv_set_register(unsigned int reg, u64 value) +static inline void hv_set_msr(unsigned int reg, u64 value) { hv_set_vpreg(reg, value); } -static inline u64 hv_get_register(unsigned int reg) +static inline u64 hv_get_msr(unsigned int reg) { return hv_get_vpreg(reg); } diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 91fbd5c8a391..6567df8ec8ca 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -41,6 +41,8 @@ void mte_free_tag_storage(char *storage); static inline void set_page_mte_tagged(struct page *page) { + VM_WARN_ON_ONCE(folio_test_hugetlb(page_folio(page))); + /* * Ensure that the tags written prior to this function are visible * before the page flags update. @@ -53,6 +55,8 @@ static inline bool page_mte_tagged(struct page *page) { bool ret = test_bit(PG_mte_tagged, &page->flags); + VM_WARN_ON_ONCE(folio_test_hugetlb(page_folio(page))); + /* * If the page is tagged, ensure ordering with a likely subsequent * read of the tags. @@ -76,6 +80,8 @@ static inline bool page_mte_tagged(struct page *page) */ static inline bool try_page_mte_tagging(struct page *page) { + VM_WARN_ON_ONCE(folio_test_hugetlb(page_folio(page))); + if (!test_and_set_bit(PG_mte_lock, &page->flags)) return true; @@ -157,6 +163,67 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child, #endif /* CONFIG_ARM64_MTE */ +#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_ARM64_MTE) +static inline void folio_set_hugetlb_mte_tagged(struct folio *folio) +{ + VM_WARN_ON_ONCE(!folio_test_hugetlb(folio)); + + /* + * Ensure that the tags written prior to this function are visible + * before the folio flags update. + */ + smp_wmb(); + set_bit(PG_mte_tagged, &folio->flags); + +} + +static inline bool folio_test_hugetlb_mte_tagged(struct folio *folio) +{ + bool ret = test_bit(PG_mte_tagged, &folio->flags); + + VM_WARN_ON_ONCE(!folio_test_hugetlb(folio)); + + /* + * If the folio is tagged, ensure ordering with a likely subsequent + * read of the tags. + */ + if (ret) + smp_rmb(); + return ret; +} + +static inline bool folio_try_hugetlb_mte_tagging(struct folio *folio) +{ + VM_WARN_ON_ONCE(!folio_test_hugetlb(folio)); + + if (!test_and_set_bit(PG_mte_lock, &folio->flags)) + return true; + + /* + * The tags are either being initialised or may have been initialised + * already. Check if the PG_mte_tagged flag has been set or wait + * otherwise. + */ + smp_cond_load_acquire(&folio->flags, VAL & (1UL << PG_mte_tagged)); + + return false; +} +#else +static inline void folio_set_hugetlb_mte_tagged(struct folio *folio) +{ +} + +static inline bool folio_test_hugetlb_mte_tagged(struct folio *folio) +{ + return false; +} + +static inline bool folio_try_hugetlb_mte_tagging(struct folio *folio) +{ + return false; +} +#endif + static inline void mte_disable_tco_entry(struct task_struct *task) { if (!system_supports_mte()) @@ -182,7 +249,7 @@ void mte_check_tfsr_el1(void); static inline void mte_check_tfsr_entry(void) { - if (!system_supports_mte()) + if (!kasan_hw_tags_enabled()) return; mte_check_tfsr_el1(); @@ -190,7 +257,7 @@ static inline void mte_check_tfsr_entry(void) static inline void mte_check_tfsr_exit(void) { - if (!system_supports_mte()) + if (!kasan_hw_tags_enabled()) return; /* diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h index 2403f7b4cdbf..d402e08442ee 100644 --- a/arch/arm64/include/asm/page-def.h +++ b/arch/arm64/include/asm/page-def.h @@ -10,9 +10,6 @@ #include <linux/const.h> -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT -#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +#include <vdso/page.h> #endif /* __ASM_PAGE_DEF_H */ diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index eb7071c9eb34..ee45b4e77347 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -10,10 +10,6 @@ #include <asm/ptrace.h> #ifdef CONFIG_PERF_EVENTS -struct pt_regs; -extern unsigned long perf_instruction_pointer(struct pt_regs *regs); -extern unsigned long perf_misc_flags(struct pt_regs *regs); -#define perf_misc_flags(regs) perf_misc_flags(regs) #define perf_arch_bpf_user_pt_regs(regs) ®s->user_regs #endif diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 237224484d0f..1b4509d3382c 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -14,6 +14,7 @@ #include <asm/tlbflush.h> #define __HAVE_ARCH_PGD_FREE +#define __HAVE_ARCH_PUD_FREE #include <asm-generic/pgalloc.h> #define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) @@ -27,7 +28,7 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp) { - pudval_t pudval = PUD_TYPE_TABLE; + pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_AF; pudval |= (mm == &init_mm) ? PUD_TABLE_UXN : PUD_TABLE_PXN; __pud_populate(pudp, __pa(pmdp), pudval); @@ -43,16 +44,24 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) { - set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot)); + if (pgtable_l4_enabled()) + set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot)); } static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp) { - p4dval_t p4dval = P4D_TYPE_TABLE; + p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_AF; p4dval |= (mm == &init_mm) ? P4D_TABLE_UXN : P4D_TABLE_PXN; __p4d_populate(p4dp, __pa(pudp), p4dval); } + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + if (!pgtable_l4_enabled()) + return; + __pud_free(mm, pud); +} #else static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) { @@ -60,6 +69,29 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) } #endif /* CONFIG_PGTABLE_LEVELS > 3 */ +#if CONFIG_PGTABLE_LEVELS > 4 + +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot) +{ + if (pgtable_l5_enabled()) + set_pgd(pgdp, __pgd(__phys_to_pgd_val(p4dp) | prot)); +} + +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, p4d_t *p4dp) +{ + pgdval_t pgdval = PGD_TYPE_TABLE | PGD_TABLE_AF; + + pgdval |= (mm == &init_mm) ? PGD_TABLE_UXN : PGD_TABLE_PXN; + __pgd_populate(pgdp, __pa(p4dp), pgdval); +} + +#else +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot) +{ + BUILD_BUG(); +} +#endif /* CONFIG_PGTABLE_LEVELS > 4 */ + extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp); @@ -77,14 +109,16 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) { VM_BUG_ON(mm && mm != &init_mm); - __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN); + __pmd_populate(pmdp, __pa(ptep), + PMD_TYPE_TABLE | PMD_TABLE_AF | PMD_TABLE_UXN); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep) { VM_BUG_ON(mm == &init_mm); - __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE | PMD_TABLE_PXN); + __pmd_populate(pmdp, page_to_phys(ptep), + PMD_TYPE_TABLE | PMD_TABLE_AF | PMD_TABLE_PXN); } #endif diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index e4944d517c99..a9136cc551cc 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -26,10 +26,10 @@ #define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3)) /* - * Size mapped by an entry at level n ( 0 <= n <= 3) + * Size mapped by an entry at level n ( -1 <= n <= 3) * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits * in the final page. The maximum number of translation levels supported by - * the architecture is 4. Hence, starting at level n, we have further + * the architecture is 5. Hence, starting at level n, we have further * ((4 - n) - 1) levels of translation excluding the offset within the page. * So, the total number of bits mapped by an entry at level n is : * @@ -62,9 +62,16 @@ #define PTRS_PER_PUD (1 << (PAGE_SHIFT - 3)) #endif +#if CONFIG_PGTABLE_LEVELS > 4 +#define P4D_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(0) +#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) +#define P4D_MASK (~(P4D_SIZE-1)) +#define PTRS_PER_P4D (1 << (PAGE_SHIFT - 3)) +#endif + /* * PGDIR_SHIFT determines the size a top-level page table entry can map - * (depending on the configuration, this level can be 0, 1 or 2). + * (depending on the configuration, this level can be -1, 0, 1 or 2). */ #define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS) #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) @@ -87,6 +94,16 @@ /* * Hardware page table definitions. * + * Level -1 descriptor (PGD). + */ +#define PGD_TYPE_TABLE (_AT(pgdval_t, 3) << 0) +#define PGD_TABLE_BIT (_AT(pgdval_t, 1) << 1) +#define PGD_TYPE_MASK (_AT(pgdval_t, 3) << 0) +#define PGD_TABLE_AF (_AT(pgdval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ +#define PGD_TABLE_PXN (_AT(pgdval_t, 1) << 59) +#define PGD_TABLE_UXN (_AT(pgdval_t, 1) << 60) + +/* * Level 0 descriptor (P4D). */ #define P4D_TYPE_TABLE (_AT(p4dval_t, 3) << 0) @@ -94,6 +111,7 @@ #define P4D_TYPE_MASK (_AT(p4dval_t, 3) << 0) #define P4D_TYPE_SECT (_AT(p4dval_t, 1) << 0) #define P4D_SECT_RDONLY (_AT(p4dval_t, 1) << 7) /* AP[2] */ +#define P4D_TABLE_AF (_AT(p4dval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ #define P4D_TABLE_PXN (_AT(p4dval_t, 1) << 59) #define P4D_TABLE_UXN (_AT(p4dval_t, 1) << 60) @@ -105,6 +123,7 @@ #define PUD_TYPE_MASK (_AT(pudval_t, 3) << 0) #define PUD_TYPE_SECT (_AT(pudval_t, 1) << 0) #define PUD_SECT_RDONLY (_AT(pudval_t, 1) << 7) /* AP[2] */ +#define PUD_TABLE_AF (_AT(pudval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ #define PUD_TABLE_PXN (_AT(pudval_t, 1) << 59) #define PUD_TABLE_UXN (_AT(pudval_t, 1) << 60) @@ -115,11 +134,11 @@ #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) #define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1) +#define PMD_TABLE_AF (_AT(pmdval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ /* * Section */ -#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) #define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */ #define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */ #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) @@ -154,14 +173,19 @@ #define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */ #define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */ #define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */ +#define PTE_SWBITS_MASK _AT(pteval_t, (BIT(63) | GENMASK(58, 55))) -#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) +#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (50 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) #ifdef CONFIG_ARM64_PA_BITS_52 +#ifdef CONFIG_ARM64_64K_PAGES #define PTE_ADDR_HIGH (_AT(pteval_t, 0xf) << 12) -#define PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH) #define PTE_ADDR_HIGH_SHIFT 36 +#define PHYS_TO_PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH) #else -#define PTE_ADDR_MASK PTE_ADDR_LOW +#define PTE_ADDR_HIGH (_AT(pteval_t, 0x3) << 8) +#define PTE_ADDR_HIGH_SHIFT 42 +#define PHYS_TO_PTE_ADDR_MASK GENMASK_ULL(49, 8) +#endif #endif /* @@ -179,15 +203,24 @@ #define PTE_PI_IDX_3 54 /* UXN */ /* + * POIndex[2:0] encoding (Permission Overlay Extension) + */ +#define PTE_PO_IDX_0 (_AT(pteval_t, 1) << 60) +#define PTE_PO_IDX_1 (_AT(pteval_t, 1) << 61) +#define PTE_PO_IDX_2 (_AT(pteval_t, 1) << 62) + +#define PTE_PO_IDX_MASK GENMASK_ULL(62, 60) + + +/* * Memory Attribute override for Stage-2 (MemAttr[3:0]) */ #define PTE_S2_MEMATTR(t) (_AT(pteval_t, (t)) << 2) /* - * Highest possible physical address supported. + * Hierarchical permission for Stage-1 tables */ -#define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS) -#define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1) +#define S1_TABLE_AP (_AT(pmdval_t, 3) << 61) #define TTBR_CNP_BIT (UL(1) << 0) @@ -277,6 +310,11 @@ #define TCR_TBI1 (UL(1) << 38) #define TCR_HA (UL(1) << 39) #define TCR_HD (UL(1) << 40) +#define TCR_HPD0_SHIFT 41 +#define TCR_HPD0 (UL(1) << TCR_HPD0_SHIFT) +#define TCR_HPD1_SHIFT 42 +#define TCR_HPD1 (UL(1) << TCR_HPD1_SHIFT) +#define TCR_TBID0 (UL(1) << 51) #define TCR_TBID1 (UL(1) << 52) #define TCR_NFD0 (UL(1) << 53) #define TCR_NFD1 (UL(1) << 54) @@ -284,6 +322,7 @@ #define TCR_E0PD1 (UL(1) << 56) #define TCR_TCMA0 (UL(1) << 57) #define TCR_TCMA1 (UL(1) << 58) +#define TCR_DS (UL(1) << 59) /* * TTBR. diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 483dbfa39c4c..a95f1f77bb39 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -18,20 +18,26 @@ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) #define PTE_DEVMAP (_AT(pteval_t, 1) << 57) -#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ /* - * This bit indicates that the entry is present i.e. pmd_page() - * still points to a valid huge page in memory even if the pmd - * has been invalidated. + * PTE_PRESENT_INVALID=1 & PTE_VALID=0 indicates that the pte's fields should be + * interpreted according to the HW layout by SW but any attempted HW access to + * the address will result in a fault. pte_present() returns true. */ -#define PMD_PRESENT_INVALID (_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */ +#define PTE_PRESENT_INVALID (PTE_NG) /* only when !PTE_VALID */ + +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +#define PTE_UFFD_WP (_AT(pteval_t, 1) << 58) /* uffd-wp tracking */ +#define PTE_SWP_UFFD_WP (_AT(pteval_t, 1) << 3) /* only for swp ptes */ +#else +#define PTE_UFFD_WP (_AT(pteval_t, 0)) +#define PTE_SWP_UFFD_WP (_AT(pteval_t, 0)) +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ #define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) -#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) -#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) -#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) +#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_MAYBE_NG | PTE_MAYBE_SHARED | PTE_AF) +#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_MAYBE_NG | PMD_MAYBE_SHARED | PMD_SECT_AF) #define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) #define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) @@ -57,21 +63,40 @@ #define _PAGE_READONLY_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) #define _PAGE_EXECONLY (_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) -#ifdef __ASSEMBLY__ -#define PTE_MAYBE_NG 0 -#endif - #ifndef __ASSEMBLY__ #include <asm/cpufeature.h> #include <asm/pgtable-types.h> +#include <asm/rsi.h> extern bool arm64_use_ng_mappings; +extern unsigned long prot_ns_shared; + +#define PROT_NS_SHARED (is_realm_world() ? prot_ns_shared : 0) #define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0) #define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0) +#ifndef CONFIG_ARM64_LPA2 #define lpa2_is_enabled() false +#define PTE_MAYBE_SHARED PTE_SHARED +#define PMD_MAYBE_SHARED PMD_SECT_S +#define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS) +#else +static inline bool __pure lpa2_is_enabled(void) +{ + return read_tcr() & TCR_DS; +} + +#define PTE_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PTE_SHARED) +#define PMD_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PMD_SECT_S) +#define PHYS_MASK_SHIFT (lpa2_is_enabled() ? CONFIG_ARM64_PA_BITS : 48) +#endif + +/* + * Highest possible physical address supported. + */ +#define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1) /* * If we have userspace only BTI we don't want to mark kernel pages @@ -95,7 +120,7 @@ extern bool arm64_use_ng_mappings; __val; \ }) -#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) +#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PRESENT_INVALID | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) /* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */ #define PAGE_SHARED __pgprot(_PAGE_SHARED) #define PAGE_SHARED_EXEC __pgprot(_PAGE_SHARED_EXEC) @@ -129,22 +154,32 @@ extern bool arm64_use_ng_mappings; /* 6: PTE_PXN | PTE_WRITE */ /* 7: PAGE_SHARED_EXEC PTE_PXN | PTE_WRITE | PTE_USER */ /* 8: PAGE_KERNEL_ROX PTE_UXN */ -/* 9: PTE_UXN | PTE_USER */ +/* 9: PAGE_GCS_RO PTE_UXN | PTE_USER */ /* a: PAGE_KERNEL_EXEC PTE_UXN | PTE_WRITE */ -/* b: PTE_UXN | PTE_WRITE | PTE_USER */ +/* b: PAGE_GCS PTE_UXN | PTE_WRITE | PTE_USER */ /* c: PAGE_KERNEL_RO PTE_UXN | PTE_PXN */ /* d: PAGE_READONLY PTE_UXN | PTE_PXN | PTE_USER */ /* e: PAGE_KERNEL PTE_UXN | PTE_PXN | PTE_WRITE */ /* f: PAGE_SHARED PTE_UXN | PTE_PXN | PTE_WRITE | PTE_USER */ +#define _PAGE_GCS (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_WRITE | PTE_USER) +#define _PAGE_GCS_RO (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_USER) + +#define PAGE_GCS __pgprot(_PAGE_GCS) +#define PAGE_GCS_RO __pgprot(_PAGE_GCS_RO) + #define PIE_E0 ( \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS), PIE_GCS) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS_RO), PIE_R) | \ PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \ - PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX) | \ - PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX) | \ - PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R) | \ - PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW)) + PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX_O) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX_O) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R_O) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW_O)) #define PIE_E1 ( \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS), PIE_NONE_O) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS_RO), PIE_NONE_O) | \ PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_NONE_O) | \ PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_R) | \ PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RW) | \ diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h index b8f158ae2527..6d6d4065b0cb 100644 --- a/arch/arm64/include/asm/pgtable-types.h +++ b/arch/arm64/include/asm/pgtable-types.h @@ -36,6 +36,12 @@ typedef struct { pudval_t pud; } pud_t; #define __pud(x) ((pud_t) { (x) } ) #endif +#if CONFIG_PGTABLE_LEVELS > 4 +typedef struct { p4dval_t p4d; } p4d_t; +#define p4d_val(x) ((x).p4d) +#define __p4d(x) ((p4d_t) { (x) } ) +#endif + typedef struct { pgdval_t pgd; } pgd_t; #define pgd_val(x) ((x).pgd) #define __pgd(x) ((pgd_t) { (x) } ) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 79ce70fbb751..0b2a2ad1b9e8 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -18,11 +18,15 @@ * VMALLOC range. * * VMALLOC_START: beginning of the kernel vmalloc space - * VMALLOC_END: extends to the available space below vmemmap, PCI I/O space - * and fixed mappings + * VMALLOC_END: extends to the available space below vmemmap */ #define VMALLOC_START (MODULES_END) -#define VMALLOC_END (VMEMMAP_START - SZ_256M) +#if VA_BITS == VA_BITS_MIN +#define VMALLOC_END (VMEMMAP_START - SZ_8M) +#else +#define VMEMMAP_UNUSED_NPAGES ((_PAGE_OFFSET(vabits_actual) - PAGE_OFFSET) >> PAGE_SHIFT) +#define VMALLOC_END (VMEMMAP_START + VMEMMAP_UNUSED_NPAGES * sizeof(struct page) - SZ_8M) +#endif #define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)) @@ -30,6 +34,7 @@ #include <asm/cmpxchg.h> #include <asm/fixmap.h> +#include <asm/por.h> #include <linux/mmdebug.h> #include <linux/mm_types.h> #include <linux/sched.h> @@ -45,12 +50,6 @@ __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -static inline bool arch_thp_swp_supported(void) -{ - return !system_supports_mte(); -} -#define arch_thp_swp_supported arch_thp_swp_supported - /* * Outside of a few very special situations (e.g. hibernation), we always * use broadcast TLB invalidation instructions, therefore a spurious page @@ -76,15 +75,16 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #ifdef CONFIG_ARM64_PA_BITS_52 static inline phys_addr_t __pte_to_phys(pte_t pte) { + pte_val(pte) &= ~PTE_MAYBE_SHARED; return (pte_val(pte) & PTE_ADDR_LOW) | ((pte_val(pte) & PTE_ADDR_HIGH) << PTE_ADDR_HIGH_SHIFT); } static inline pteval_t __phys_to_pte_val(phys_addr_t phys) { - return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PTE_ADDR_MASK; + return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PHYS_TO_PTE_ADDR_MASK; } #else -#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_MASK) +#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_LOW) #define __phys_to_pte_val(phys) (phys) #endif @@ -93,13 +93,14 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) #define pte_none(pte) (!pte_val(pte)) -#define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0)) +#define __pte_clear(mm, addr, ptep) \ + __set_pte(ptep, __pte(0)) #define pte_page(pte) (pfn_to_page(pte_pfn(pte))) /* * The following only work if pte_present(). Undefined behaviour otherwise. */ -#define pte_present(pte) (!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE))) +#define pte_present(pte) (pte_valid(pte) || pte_present_invalid(pte)) #define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) @@ -126,6 +127,8 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) +#define pte_present_invalid(pte) \ + ((pte_val(pte) & (PTE_VALID | PTE_PRESENT_INVALID)) == PTE_PRESENT_INVALID) /* * Execute-only user mappings do not have the PTE_USER bit set. All valid * kernel mappings have the PTE_UXN bit set. @@ -133,16 +136,38 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) #define pte_valid_not_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) /* + * Returns true if the pte is valid and has the contiguous bit set. + */ +#define pte_valid_cont(pte) (pte_valid(pte) && pte_cont(pte)) +/* * Could the pte be present in the TLB? We must check mm_tlb_flush_pending * so that we don't erroneously return false for pages that have been * remapped as PROT_NONE but are yet to be flushed from the TLB. * Note that we can't make any assumptions based on the state of the access - * flag, since ptep_clear_flush_young() elides a DSB when invalidating the + * flag, since __ptep_clear_flush_young() elides a DSB when invalidating the * TLB. */ #define pte_accessible(mm, pte) \ (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte)) +static inline bool por_el0_allows_pkey(u8 pkey, bool write, bool execute) +{ + u64 por; + + if (!system_supports_poe()) + return true; + + por = read_sysreg_s(SYS_POR_EL0); + + if (write) + return por_elx_allows_write(por, pkey); + + if (execute) + return por_elx_allows_exec(por, pkey); + + return por_elx_allows_read(por, pkey); +} + /* * p??_access_permitted() is true for valid user mappings (PTE_USER * bit set, subject to the write permission check). For execute-only @@ -150,8 +175,11 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) * not set) must return false. PROT_NONE mappings do not have the * PTE_VALID bit set. */ -#define pte_access_permitted(pte, write) \ +#define pte_access_permitted_no_overlay(pte, write) \ (((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) && (!(write) || pte_write(pte))) +#define pte_access_permitted(pte, write) \ + (pte_access_permitted_no_overlay(pte, write) && \ + por_el0_allows_pkey(FIELD_GET(PTE_PO_IDX_MASK, pte_val(pte)), write, false)) #define pmd_access_permitted(pmd, write) \ (pte_access_permitted(pmd_pte(pmd), (write))) #define pud_access_permitted(pud, write) \ @@ -237,8 +265,7 @@ static inline pte_t pte_mkspecial(pte_t pte) static inline pte_t pte_mkcont(pte_t pte) { - pte = set_pte_bit(pte, __pgprot(PTE_CONT)); - return set_pte_bit(pte, __pgprot(PTE_TYPE_PAGE)); + return set_pte_bit(pte, __pgprot(PTE_CONT)); } static inline pte_t pte_mknoncont(pte_t pte) @@ -246,11 +273,18 @@ static inline pte_t pte_mknoncont(pte_t pte) return clear_pte_bit(pte, __pgprot(PTE_CONT)); } -static inline pte_t pte_mkpresent(pte_t pte) +static inline pte_t pte_mkvalid(pte_t pte) { return set_pte_bit(pte, __pgprot(PTE_VALID)); } +static inline pte_t pte_mkinvalid(pte_t pte) +{ + pte = set_pte_bit(pte, __pgprot(PTE_PRESENT_INVALID)); + pte = clear_pte_bit(pte, __pgprot(PTE_VALID)); + return pte; +} + static inline pmd_t pmd_mkcont(pmd_t pmd) { return __pmd(pmd_val(pmd) | PMD_SECT_CONT); @@ -261,9 +295,31 @@ static inline pte_t pte_mkdevmap(pte_t pte) return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL)); } -static inline void set_pte(pte_t *ptep, pte_t pte) +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline int pte_uffd_wp(pte_t pte) +{ + return !!(pte_val(pte) & PTE_UFFD_WP); +} + +static inline pte_t pte_mkuffd_wp(pte_t pte) +{ + return pte_wrprotect(set_pte_bit(pte, __pgprot(PTE_UFFD_WP))); +} + +static inline pte_t pte_clear_uffd_wp(pte_t pte) +{ + return clear_pte_bit(pte, __pgprot(PTE_UFFD_WP)); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + +static inline void __set_pte_nosync(pte_t *ptep, pte_t pte) { WRITE_ONCE(*ptep, pte); +} + +static inline void __set_pte(pte_t *ptep, pte_t pte) +{ + __set_pte_nosync(ptep, pte); /* * Only if the new pte is valid and kernel, otherwise TLB maintenance @@ -275,8 +331,13 @@ static inline void set_pte(pte_t *ptep, pte_t pte) } } +static inline pte_t __ptep_get(pte_t *ptep) +{ + return READ_ONCE(*ptep); +} + extern void __sync_icache_dcache(pte_t pteval); -bool pgattr_change_is_safe(u64 old, u64 new); +bool pgattr_change_is_safe(pteval_t old, pteval_t new); /* * PTE bits configuration in the presence of hardware Dirty Bit Management @@ -302,7 +363,7 @@ static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep, if (!IS_ENABLED(CONFIG_DEBUG_VM)) return; - old_pte = READ_ONCE(*ptep); + old_pte = __ptep_get(ptep); if (!pte_valid(old_pte) || !pte_valid(pte)) return; @@ -311,7 +372,7 @@ static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep, /* * Check for potential race with hardware updates of the pte - * (ptep_set_access_flags safely changes valid ptes without going + * (__ptep_set_access_flags safely changes valid ptes without going * through an invalid entry). */ VM_WARN_ONCE(!pte_young(pte), @@ -333,36 +394,48 @@ static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages) /* * If the PTE would provide user space access to the tags associated * with it then ensure that the MTE tags are synchronised. Although - * pte_access_permitted() returns false for exec only mappings, they - * don't expose tags (instruction fetches don't check tags). + * pte_access_permitted_no_overlay() returns false for exec only + * mappings, they don't expose tags (instruction fetches don't check + * tags). */ - if (system_supports_mte() && pte_access_permitted(pte, false) && + if (system_supports_mte() && pte_access_permitted_no_overlay(pte, false) && !pte_special(pte) && pte_tagged(pte)) mte_sync_tags(pte, nr_pages); } -static inline void set_ptes(struct mm_struct *mm, - unsigned long __always_unused addr, - pte_t *ptep, pte_t pte, unsigned int nr) +/* + * Select all bits except the pfn + */ +#define pte_pgprot pte_pgprot +static inline pgprot_t pte_pgprot(pte_t pte) +{ + unsigned long pfn = pte_pfn(pte); + + return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); +} + +#define pte_advance_pfn pte_advance_pfn +static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr) +{ + return pfn_pte(pte_pfn(pte) + nr, pte_pgprot(pte)); +} + +static inline void __set_ptes(struct mm_struct *mm, + unsigned long __always_unused addr, + pte_t *ptep, pte_t pte, unsigned int nr) { page_table_check_ptes_set(mm, ptep, pte, nr); __sync_cache_and_tags(pte, nr); for (;;) { __check_safe_pte_update(mm, ptep, pte); - set_pte(ptep, pte); + __set_pte(ptep, pte); if (--nr == 0) break; ptep++; - pte_val(pte) += PAGE_SIZE; + pte = pte_advance_pfn(pte, 1); } } -#define set_ptes set_ptes - -/* - * Huge pte definitions. - */ -#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT)) /* * Hugetlb definitions. @@ -433,15 +506,22 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE)); } -/* - * Select all bits except the pfn - */ -static inline pgprot_t pte_pgprot(pte_t pte) +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline pte_t pte_swp_mkuffd_wp(pte_t pte) { - unsigned long pfn = pte_pfn(pte); + return set_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP)); +} - return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); +static inline int pte_swp_uffd_wp(pte_t pte) +{ + return !!(pte_val(pte) & PTE_SWP_UFFD_WP); +} + +static inline pte_t pte_swp_clear_uffd_wp(pte_t pte) +{ + return clear_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP)); } +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ #ifdef CONFIG_NUMA_BALANCING /* @@ -449,7 +529,16 @@ static inline pgprot_t pte_pgprot(pte_t pte) */ static inline int pte_protnone(pte_t pte) { - return (pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)) == PTE_PROT_NONE; + /* + * pte_present_invalid() tells us that the pte is invalid from HW + * perspective but present from SW perspective, so the fields are to be + * interpretted as per the HW layout. The second 2 checks are the unique + * encoding that we use for PROT_NONE. It is insufficient to only use + * the first check because we share the same encoding scheme with pmds + * which support pmd_mkinvalid(), so can be present-invalid without + * being PROT_NONE. + */ + return pte_present_invalid(pte) && !pte_user(pte) && !pte_user_exec(pte); } static inline int pmd_protnone(pmd_t pmd) @@ -458,12 +547,7 @@ static inline int pmd_protnone(pmd_t pmd) } #endif -#define pmd_present_invalid(pmd) (!!(pmd_val(pmd) & PMD_PRESENT_INVALID)) - -static inline int pmd_present(pmd_t pmd) -{ - return pte_present(pmd_pte(pmd)) || pmd_present_invalid(pmd); -} +#define pmd_present(pmd) pte_present(pmd_pte(pmd)) /* * THP definitions. @@ -488,16 +572,16 @@ static inline int pmd_trans_huge(pmd_t pmd) #define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) - -static inline pmd_t pmd_mkinvalid(pmd_t pmd) -{ - pmd = set_pmd_bit(pmd, __pgprot(PMD_PRESENT_INVALID)); - pmd = clear_pmd_bit(pmd, __pgprot(PMD_SECT_VALID)); - - return pmd; -} - -#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) +#define pmd_mkinvalid(pmd) pte_pmd(pte_mkinvalid(pmd_pte(pmd))) +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +#define pmd_uffd_wp(pmd) pte_uffd_wp(pmd_pte(pmd)) +#define pmd_mkuffd_wp(pmd) pte_pmd(pte_mkuffd_wp(pmd_pte(pmd))) +#define pmd_clear_uffd_wp(pmd) pte_pmd(pte_clear_uffd_wp(pmd_pte(pmd))) +#define pmd_swp_uffd_wp(pmd) pte_swp_uffd_wp(pmd_pte(pmd)) +#define pmd_swp_mkuffd_wp(pmd) pte_pmd(pte_swp_mkuffd_wp(pmd_pte(pmd))) +#define pmd_swp_clear_uffd_wp(pmd) \ + pte_pmd(pte_swp_clear_uffd_wp(pmd_pte(pmd))) +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ #define pmd_write(pmd) pte_write(pmd_pte(pmd)) @@ -511,6 +595,14 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) return pte_pmd(set_pte_bit(pmd_pte(pmd), __pgprot(PTE_DEVMAP))); } +#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP +#define pmd_special(pte) (!!((pmd_val(pte) & PTE_SPECIAL))) +static inline pmd_t pmd_mkspecial(pmd_t pmd) +{ + return set_pmd_bit(pmd, __pgprot(PTE_SPECIAL)); +} +#endif + #define __pmd_to_phys(pmd) __pte_to_phys(pmd_pte(pmd)) #define __phys_to_pmd_val(phys) __phys_to_pte_val(phys) #define pmd_pfn(pmd) ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT) @@ -528,13 +620,34 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) #define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT) #define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) +#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP +#define pud_special(pte) pte_special(pud_pte(pud)) +#define pud_mkspecial(pte) pte_pud(pte_mkspecial(pud_pte(pud))) +#endif + +#define pmd_pgprot pmd_pgprot +static inline pgprot_t pmd_pgprot(pmd_t pmd) +{ + unsigned long pfn = pmd_pfn(pmd); + + return __pgprot(pmd_val(pfn_pmd(pfn, __pgprot(0))) ^ pmd_val(pmd)); +} + +#define pud_pgprot pud_pgprot +static inline pgprot_t pud_pgprot(pud_t pud) +{ + unsigned long pfn = pud_pfn(pud); + + return __pgprot(pud_val(pfn_pud(pfn, __pgprot(0))) ^ pud_val(pud)); +} + static inline void __set_pte_at(struct mm_struct *mm, unsigned long __always_unused addr, pte_t *ptep, pte_t pte, unsigned int nr) { __sync_cache_and_tags(pte, nr); __check_safe_pte_update(mm, ptep, pte); - set_pte(ptep, pte); + __set_pte(ptep, pte); } static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, @@ -565,6 +678,11 @@ static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, #define pgprot_nx(prot) \ __pgprot_modify(prot, PTE_MAYBE_GP, PTE_PXN) +#define pgprot_decrypted(prot) \ + __pgprot_modify(prot, PROT_NS_SHARED, PROT_NS_SHARED) +#define pgprot_encrypted(prot) \ + __pgprot_modify(prot, PROT_NS_SHARED, 0) + /* * Mark the prot value as uncacheable and unbufferable. */ @@ -616,12 +734,12 @@ static inline bool pud_table(pud_t pud) { return true; } PUD_TYPE_TABLE) #endif -extern pgd_t init_pg_dir[PTRS_PER_PGD]; +extern pgd_t init_pg_dir[]; extern pgd_t init_pg_end[]; -extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; -extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; -extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; -extern pgd_t reserved_pg_dir[PTRS_PER_PGD]; +extern pgd_t swapper_pg_dir[]; +extern pgd_t idmap_pg_dir[]; +extern pgd_t tramp_pg_dir[]; +extern pgd_t reserved_pg_dir[]; extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd); @@ -689,19 +807,23 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!pud_table(pud)) #define pud_present(pud) pte_present(pud_pte(pud)) +#ifndef __PAGETABLE_PMD_FOLDED #define pud_leaf(pud) (pud_present(pud) && !pud_table(pud)) +#else +#define pud_leaf(pud) false +#endif #define pud_valid(pud) pte_valid(pud_pte(pud)) #define pud_user(pud) pte_user(pud_pte(pud)) #define pud_user_exec(pud) pte_user_exec(pud_pte(pud)) +static inline bool pgtable_l4_enabled(void); + static inline void set_pud(pud_t *pudp, pud_t pud) { -#ifdef __PAGETABLE_PUD_FOLDED - if (in_swapper_pgdir(pudp)) { + if (!pgtable_l4_enabled() && in_swapper_pgdir(pudp)) { set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud))); return; } -#endif /* __PAGETABLE_PUD_FOLDED */ WRITE_ONCE(*pudp, pud); @@ -740,6 +862,7 @@ static inline pmd_t *pud_pgtable(pud_t pud) #else +#define pud_valid(pud) false #define pud_page_paddr(pud) ({ BUILD_BUG(); 0; }) #define pud_user_exec(pud) pud_user(pud) /* Always 0 with folding */ @@ -754,12 +877,27 @@ static inline pmd_t *pud_pgtable(pud_t pud) #if CONFIG_PGTABLE_LEVELS > 3 +static __always_inline bool pgtable_l4_enabled(void) +{ + if (CONFIG_PGTABLE_LEVELS > 4 || !IS_ENABLED(CONFIG_ARM64_LPA2)) + return true; + if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT)) + return vabits_actual == VA_BITS; + return alternative_has_cap_unlikely(ARM64_HAS_VA52); +} + +static inline bool mm_pud_folded(const struct mm_struct *mm) +{ + return !pgtable_l4_enabled(); +} +#define mm_pud_folded mm_pud_folded + #define pud_ERROR(e) \ pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e)) -#define p4d_none(p4d) (!p4d_val(p4d)) -#define p4d_bad(p4d) (!(p4d_val(p4d) & 2)) -#define p4d_present(p4d) (p4d_val(p4d)) +#define p4d_none(p4d) (pgtable_l4_enabled() && !p4d_val(p4d)) +#define p4d_bad(p4d) (pgtable_l4_enabled() && !(p4d_val(p4d) & P4D_TABLE_BIT)) +#define p4d_present(p4d) (!p4d_none(p4d)) static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) { @@ -775,7 +913,8 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) static inline void p4d_clear(p4d_t *p4dp) { - set_p4d(p4dp, __p4d(0)); + if (pgtable_l4_enabled()) + set_p4d(p4dp, __p4d(0)); } static inline phys_addr_t p4d_page_paddr(p4d_t p4d) @@ -783,27 +922,78 @@ static inline phys_addr_t p4d_page_paddr(p4d_t p4d) return __p4d_to_phys(p4d); } +#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) + +static inline pud_t *p4d_to_folded_pud(p4d_t *p4dp, unsigned long addr) +{ + /* Ensure that 'p4dp' indexes a page table according to 'addr' */ + VM_BUG_ON(((addr >> P4D_SHIFT) ^ ((u64)p4dp >> 3)) % PTRS_PER_P4D); + + return (pud_t *)PTR_ALIGN_DOWN(p4dp, PAGE_SIZE) + pud_index(addr); +} + static inline pud_t *p4d_pgtable(p4d_t p4d) { return (pud_t *)__va(p4d_page_paddr(p4d)); } -/* Find an entry in the first-level page table. */ -#define pud_offset_phys(dir, addr) (p4d_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t)) +static inline phys_addr_t pud_offset_phys(p4d_t *p4dp, unsigned long addr) +{ + BUG_ON(!pgtable_l4_enabled()); + + return p4d_page_paddr(READ_ONCE(*p4dp)) + pud_index(addr) * sizeof(pud_t); +} + +static inline +pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long addr) +{ + if (!pgtable_l4_enabled()) + return p4d_to_folded_pud(p4dp, addr); + return (pud_t *)__va(p4d_page_paddr(p4d)) + pud_index(addr); +} +#define pud_offset_lockless pud_offset_lockless -#define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr)) -#define pud_set_fixmap_offset(p4d, addr) pud_set_fixmap(pud_offset_phys(p4d, addr)) -#define pud_clear_fixmap() clear_fixmap(FIX_PUD) +static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long addr) +{ + return pud_offset_lockless(p4dp, READ_ONCE(*p4dp), addr); +} +#define pud_offset pud_offset -#define p4d_page(p4d) pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d))) +static inline pud_t *pud_set_fixmap(unsigned long addr) +{ + if (!pgtable_l4_enabled()) + return NULL; + return (pud_t *)set_fixmap_offset(FIX_PUD, addr); +} + +static inline pud_t *pud_set_fixmap_offset(p4d_t *p4dp, unsigned long addr) +{ + if (!pgtable_l4_enabled()) + return p4d_to_folded_pud(p4dp, addr); + return pud_set_fixmap(pud_offset_phys(p4dp, addr)); +} + +static inline void pud_clear_fixmap(void) +{ + if (pgtable_l4_enabled()) + clear_fixmap(FIX_PUD); +} /* use ONLY for statically allocated translation tables */ -#define pud_offset_kimg(dir,addr) ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr)))) +static inline pud_t *pud_offset_kimg(p4d_t *p4dp, u64 addr) +{ + if (!pgtable_l4_enabled()) + return p4d_to_folded_pud(p4dp, addr); + return (pud_t *)__phys_to_kimg(pud_offset_phys(p4dp, addr)); +} + +#define p4d_page(p4d) pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d))) #else +static inline bool pgtable_l4_enabled(void) { return false; } + #define p4d_page_paddr(p4d) ({ BUILD_BUG(); 0;}) -#define pgd_page_paddr(pgd) ({ BUILD_BUG(); 0;}) /* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */ #define pud_set_fixmap(addr) NULL @@ -814,6 +1004,149 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) #endif /* CONFIG_PGTABLE_LEVELS > 3 */ +#if CONFIG_PGTABLE_LEVELS > 4 + +static __always_inline bool pgtable_l5_enabled(void) +{ + if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT)) + return vabits_actual == VA_BITS; + return alternative_has_cap_unlikely(ARM64_HAS_VA52); +} + +static inline bool mm_p4d_folded(const struct mm_struct *mm) +{ + return !pgtable_l5_enabled(); +} +#define mm_p4d_folded mm_p4d_folded + +#define p4d_ERROR(e) \ + pr_err("%s:%d: bad p4d %016llx.\n", __FILE__, __LINE__, p4d_val(e)) + +#define pgd_none(pgd) (pgtable_l5_enabled() && !pgd_val(pgd)) +#define pgd_bad(pgd) (pgtable_l5_enabled() && !(pgd_val(pgd) & PGD_TABLE_BIT)) +#define pgd_present(pgd) (!pgd_none(pgd)) + +static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + if (in_swapper_pgdir(pgdp)) { + set_swapper_pgd(pgdp, __pgd(pgd_val(pgd))); + return; + } + + WRITE_ONCE(*pgdp, pgd); + dsb(ishst); + isb(); +} + +static inline void pgd_clear(pgd_t *pgdp) +{ + if (pgtable_l5_enabled()) + set_pgd(pgdp, __pgd(0)); +} + +static inline phys_addr_t pgd_page_paddr(pgd_t pgd) +{ + return __pgd_to_phys(pgd); +} + +#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) + +static inline p4d_t *pgd_to_folded_p4d(pgd_t *pgdp, unsigned long addr) +{ + /* Ensure that 'pgdp' indexes a page table according to 'addr' */ + VM_BUG_ON(((addr >> PGDIR_SHIFT) ^ ((u64)pgdp >> 3)) % PTRS_PER_PGD); + + return (p4d_t *)PTR_ALIGN_DOWN(pgdp, PAGE_SIZE) + p4d_index(addr); +} + +static inline phys_addr_t p4d_offset_phys(pgd_t *pgdp, unsigned long addr) +{ + BUG_ON(!pgtable_l5_enabled()); + + return pgd_page_paddr(READ_ONCE(*pgdp)) + p4d_index(addr) * sizeof(p4d_t); +} + +static inline +p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long addr) +{ + if (!pgtable_l5_enabled()) + return pgd_to_folded_p4d(pgdp, addr); + return (p4d_t *)__va(pgd_page_paddr(pgd)) + p4d_index(addr); +} +#define p4d_offset_lockless p4d_offset_lockless + +static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long addr) +{ + return p4d_offset_lockless(pgdp, READ_ONCE(*pgdp), addr); +} + +static inline p4d_t *p4d_set_fixmap(unsigned long addr) +{ + if (!pgtable_l5_enabled()) + return NULL; + return (p4d_t *)set_fixmap_offset(FIX_P4D, addr); +} + +static inline p4d_t *p4d_set_fixmap_offset(pgd_t *pgdp, unsigned long addr) +{ + if (!pgtable_l5_enabled()) + return pgd_to_folded_p4d(pgdp, addr); + return p4d_set_fixmap(p4d_offset_phys(pgdp, addr)); +} + +static inline void p4d_clear_fixmap(void) +{ + if (pgtable_l5_enabled()) + clear_fixmap(FIX_P4D); +} + +/* use ONLY for statically allocated translation tables */ +static inline p4d_t *p4d_offset_kimg(pgd_t *pgdp, u64 addr) +{ + if (!pgtable_l5_enabled()) + return pgd_to_folded_p4d(pgdp, addr); + return (p4d_t *)__phys_to_kimg(p4d_offset_phys(pgdp, addr)); +} + +#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd))) + +#else + +static inline bool pgtable_l5_enabled(void) { return false; } + +#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) + +/* Match p4d_offset folding in <asm/generic/pgtable-nop4d.h> */ +#define p4d_set_fixmap(addr) NULL +#define p4d_set_fixmap_offset(p4dp, addr) ((p4d_t *)p4dp) +#define p4d_clear_fixmap() + +#define p4d_offset_kimg(dir,addr) ((p4d_t *)dir) + +static inline +p4d_t *p4d_offset_lockless_folded(pgd_t *pgdp, pgd_t pgd, unsigned long addr) +{ + /* + * With runtime folding of the pud, pud_offset_lockless() passes + * the 'pgd_t *' we return here to p4d_to_folded_pud(), which + * will offset the pointer assuming that it points into + * a page-table page. However, the fast GUP path passes us a + * pgd_t allocated on the stack and so we must use the original + * pointer in 'pgdp' to construct the p4d pointer instead of + * using the generic p4d_offset_lockless() implementation. + * + * Note: reusing the original pointer means that we may + * dereference the same (live) page-table entry multiple times. + * This is safe because it is still only loaded once in the + * context of each level and the CPU guarantees same-address + * read-after-read ordering. + */ + return p4d_offset(pgdp, addr); +} +#define p4d_offset_lockless p4d_offset_lockless_folded + +#endif /* CONFIG_PGTABLE_LEVELS > 4 */ + #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e)) @@ -827,8 +1160,9 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * in MAIR_EL1. The mask below has to include PTE_ATTRINDX_MASK. */ const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP | - PTE_ATTRINDX_MASK; + PTE_PRESENT_INVALID | PTE_VALID | PTE_WRITE | + PTE_GP | PTE_ATTRINDX_MASK | PTE_PO_IDX_MASK; + /* preserve the hardware dirty information */ if (pte_hw_dirty(pte)) pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); @@ -848,8 +1182,7 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) return pte_pmd(pte_modify(pmd_pte(pmd), newprot)); } -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -extern int ptep_set_access_flags(struct vm_area_struct *vma, +extern int __ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t entry, int dirty); @@ -859,7 +1192,8 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t entry, int dirty) { - return ptep_set_access_flags(vma, address, (pte_t *)pmdp, pmd_pte(entry), dirty); + return __ptep_set_access_flags(vma, address, (pte_t *)pmdp, + pmd_pte(entry), dirty); } static inline int pud_devmap(pud_t pud) @@ -876,29 +1210,30 @@ static inline int pgd_devmap(pgd_t pgd) #ifdef CONFIG_PAGE_TABLE_CHECK static inline bool pte_user_accessible_page(pte_t pte) { - return pte_present(pte) && (pte_user(pte) || pte_user_exec(pte)); + return pte_valid(pte) && (pte_user(pte) || pte_user_exec(pte)); } static inline bool pmd_user_accessible_page(pmd_t pmd) { - return pmd_leaf(pmd) && !pmd_present_invalid(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); + return pmd_valid(pmd) && !pmd_table(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); } static inline bool pud_user_accessible_page(pud_t pud) { - return pud_leaf(pud) && (pud_user(pud) || pud_user_exec(pud)); + return pud_valid(pud) && !pud_table(pud) && (pud_user(pud) || pud_user_exec(pud)); } #endif /* * Atomic pte/pmd modifications. */ -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -static inline int __ptep_test_and_clear_young(pte_t *ptep) +static inline int __ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, + pte_t *ptep) { pte_t old_pte, pte; - pte = READ_ONCE(*ptep); + pte = __ptep_get(ptep); do { old_pte = pte; pte = pte_mkold(pte); @@ -909,18 +1244,10 @@ static inline int __ptep_test_and_clear_young(pte_t *ptep) return pte_young(pte); } -static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, - unsigned long address, - pte_t *ptep) -{ - return __ptep_test_and_clear_young(ptep); -} - -#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH -static inline int ptep_clear_flush_young(struct vm_area_struct *vma, +static inline int __ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - int young = ptep_test_and_clear_young(vma, address, ptep); + int young = __ptep_test_and_clear_young(vma, address, ptep); if (young) { /* @@ -937,18 +1264,19 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, return young; } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { - return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp); + /* Operation applies to PMD table entry only if FEAT_HAFT is enabled */ + VM_WARN_ON(pmd_table(READ_ONCE(*pmdp)) && !system_supports_haft()); + return __ptep_test_and_clear_young(vma, address, (pte_t *)pmdp); } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */ -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, +static inline pte_t __ptep_get_and_clear(struct mm_struct *mm, unsigned long address, pte_t *ptep) { pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0)); @@ -958,6 +1286,37 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, return pte; } +static inline void __clear_full_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr, int full) +{ + for (;;) { + __ptep_get_and_clear(mm, addr, ptep); + if (--nr == 0) + break; + ptep++; + addr += PAGE_SIZE; + } +} + +static inline pte_t __get_and_clear_full_ptes(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + unsigned int nr, int full) +{ + pte_t pte, tmp_pte; + + pte = __ptep_get_and_clear(mm, addr, ptep); + while (--nr) { + ptep++; + addr += PAGE_SIZE; + tmp_pte = __ptep_get_and_clear(mm, addr, ptep); + if (pte_dirty(tmp_pte)) + pte = pte_mkdirty(pte); + if (pte_young(tmp_pte)) + pte = pte_mkyoung(pte); + } + return pte; +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, @@ -971,30 +1330,85 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +static inline void ___ptep_set_wrprotect(struct mm_struct *mm, + unsigned long address, pte_t *ptep, + pte_t pte) +{ + pte_t old_pte; + + do { + old_pte = pte; + pte = pte_wrprotect(pte); + pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), + pte_val(old_pte), pte_val(pte)); + } while (pte_val(pte) != pte_val(old_pte)); +} + /* - * ptep_set_wrprotect - mark read-only while trasferring potential hardware + * __ptep_set_wrprotect - mark read-only while transferring potential hardware * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit. */ -#define __HAVE_ARCH_PTEP_SET_WRPROTECT -static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) +static inline void __ptep_set_wrprotect(struct mm_struct *mm, + unsigned long address, pte_t *ptep) { - pte_t old_pte, pte; + ___ptep_set_wrprotect(mm, address, ptep, __ptep_get(ptep)); +} + +static inline void __wrprotect_ptes(struct mm_struct *mm, unsigned long address, + pte_t *ptep, unsigned int nr) +{ + unsigned int i; + + for (i = 0; i < nr; i++, address += PAGE_SIZE, ptep++) + __ptep_set_wrprotect(mm, address, ptep); +} + +static inline void __clear_young_dirty_pte(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, cydp_t flags) +{ + pte_t old_pte; - pte = READ_ONCE(*ptep); do { old_pte = pte; - pte = pte_wrprotect(pte); + + if (flags & CYDP_CLEAR_YOUNG) + pte = pte_mkold(pte); + if (flags & CYDP_CLEAR_DIRTY) + pte = pte_mkclean(pte); + pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte), pte_val(pte)); } while (pte_val(pte) != pte_val(old_pte)); } +static inline void __clear_young_dirty_ptes(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + unsigned int nr, cydp_t flags) +{ + pte_t pte; + + for (;;) { + pte = __ptep_get(ptep); + + if (flags == (CYDP_CLEAR_YOUNG | CYDP_CLEAR_DIRTY)) + __set_pte(ptep, pte_mkclean(pte_mkold(pte))); + else + __clear_young_dirty_pte(vma, addr, ptep, pte, flags); + + if (--nr == 0) + break; + ptep++; + addr += PAGE_SIZE; + } +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_PMDP_SET_WRPROTECT static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) { - ptep_set_wrprotect(mm, address, (pte_t *)pmdp); + __ptep_set_wrprotect(mm, address, (pte_t *)pmdp); } #define pmdp_establish pmdp_establish @@ -1010,15 +1424,16 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, * Encode and decode a swap entry: * bits 0-1: present (must be zero) * bits 2: remember PG_anon_exclusive - * bits 3-7: swap type - * bits 8-57: swap offset - * bit 58: PTE_PROT_NONE (must be zero) + * bit 3: remember uffd-wp state + * bits 6-10: swap type + * bit 11: PTE_PRESENT_INVALID (must be zero) + * bits 12-61: swap offset */ -#define __SWP_TYPE_SHIFT 3 +#define __SWP_TYPE_SHIFT 6 #define __SWP_TYPE_BITS 5 -#define __SWP_OFFSET_BITS 50 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) -#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) +#define __SWP_OFFSET_SHIFT 12 +#define __SWP_OFFSET_BITS 50 #define __SWP_OFFSET_MASK ((1UL << __SWP_OFFSET_BITS) - 1) #define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK) @@ -1042,12 +1457,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, #ifdef CONFIG_ARM64_MTE #define __HAVE_ARCH_PREPARE_TO_SWAP -static inline int arch_prepare_to_swap(struct page *page) -{ - if (system_supports_mte()) - return mte_save_tags(page); - return 0; -} +extern int arch_prepare_to_swap(struct folio *folio); #define __HAVE_ARCH_SWAP_INVALIDATE static inline void arch_swap_invalidate_page(int type, pgoff_t offset) @@ -1063,16 +1473,12 @@ static inline void arch_swap_invalidate_area(int type) } #define __HAVE_ARCH_SWAP_RESTORE -static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) -{ - if (system_supports_mte()) - mte_restore_tags(entry, &folio->page); -} +extern void arch_swap_restore(swp_entry_t entry, struct folio *folio); #endif /* CONFIG_ARM64_MTE */ /* - * On AArch64, the cache coherency is handled via the set_pte_at() function. + * On AArch64, the cache coherency is handled via the __set_ptes() function. */ static inline void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, @@ -1103,6 +1509,10 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf, */ #define arch_has_hw_pte_young cpu_has_hw_af +#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG +#define arch_has_hw_nonleaf_pmd_young system_supports_haft +#endif + /* * Experimentally, it's cheap to set the access flag in hardware and we * benefit from prefaulting mappings as 'old' to start with. @@ -1124,6 +1534,297 @@ extern pte_t ptep_modify_prot_start(struct vm_area_struct *vma, extern void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t new_pte); + +#ifdef CONFIG_ARM64_CONTPTE + +/* + * The contpte APIs are used to transparently manage the contiguous bit in ptes + * where it is possible and makes sense to do so. The PTE_CONT bit is considered + * a private implementation detail of the public ptep API (see below). + */ +extern void __contpte_try_fold(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); +extern void __contpte_try_unfold(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); +extern pte_t contpte_ptep_get(pte_t *ptep, pte_t orig_pte); +extern pte_t contpte_ptep_get_lockless(pte_t *orig_ptep); +extern void contpte_set_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned int nr); +extern void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr, int full); +extern pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + unsigned int nr, int full); +extern int contpte_ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); +extern int contpte_ptep_clear_flush_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); +extern void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr); +extern int contpte_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t entry, int dirty); +extern void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + unsigned int nr, cydp_t flags); + +static __always_inline void contpte_try_fold(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, pte_t pte) +{ + /* + * Only bother trying if both the virtual and physical addresses are + * aligned and correspond to the last entry in a contig range. The core + * code mostly modifies ranges from low to high, so this is the likely + * the last modification in the contig range, so a good time to fold. + * We can't fold special mappings, because there is no associated folio. + */ + + const unsigned long contmask = CONT_PTES - 1; + bool valign = ((addr >> PAGE_SHIFT) & contmask) == contmask; + + if (unlikely(valign)) { + bool palign = (pte_pfn(pte) & contmask) == contmask; + + if (unlikely(palign && + pte_valid(pte) && !pte_cont(pte) && !pte_special(pte))) + __contpte_try_fold(mm, addr, ptep, pte); + } +} + +static __always_inline void contpte_try_unfold(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, pte_t pte) +{ + if (unlikely(pte_valid_cont(pte))) + __contpte_try_unfold(mm, addr, ptep, pte); +} + +#define pte_batch_hint pte_batch_hint +static inline unsigned int pte_batch_hint(pte_t *ptep, pte_t pte) +{ + if (!pte_valid_cont(pte)) + return 1; + + return CONT_PTES - (((unsigned long)ptep >> 3) & (CONT_PTES - 1)); +} + +/* + * The below functions constitute the public API that arm64 presents to the + * core-mm to manipulate PTE entries within their page tables (or at least this + * is the subset of the API that arm64 needs to implement). These public + * versions will automatically and transparently apply the contiguous bit where + * it makes sense to do so. Therefore any users that are contig-aware (e.g. + * hugetlb, kernel mapper) should NOT use these APIs, but instead use the + * private versions, which are prefixed with double underscore. All of these + * APIs except for ptep_get_lockless() are expected to be called with the PTL + * held. Although the contiguous bit is considered private to the + * implementation, it is deliberately allowed to leak through the getters (e.g. + * ptep_get()), back to core code. This is required so that pte_leaf_size() can + * provide an accurate size for perf_get_pgtable_size(). But this leakage means + * its possible a pte will be passed to a setter with the contiguous bit set, so + * we explicitly clear the contiguous bit in those cases to prevent accidentally + * setting it in the pgtable. + */ + +#define ptep_get ptep_get +static inline pte_t ptep_get(pte_t *ptep) +{ + pte_t pte = __ptep_get(ptep); + + if (likely(!pte_valid_cont(pte))) + return pte; + + return contpte_ptep_get(ptep, pte); +} + +#define ptep_get_lockless ptep_get_lockless +static inline pte_t ptep_get_lockless(pte_t *ptep) +{ + pte_t pte = __ptep_get(ptep); + + if (likely(!pte_valid_cont(pte))) + return pte; + + return contpte_ptep_get_lockless(ptep); +} + +static inline void set_pte(pte_t *ptep, pte_t pte) +{ + /* + * We don't have the mm or vaddr so cannot unfold contig entries (since + * it requires tlb maintenance). set_pte() is not used in core code, so + * this should never even be called. Regardless do our best to service + * any call and emit a warning if there is any attempt to set a pte on + * top of an existing contig range. + */ + pte_t orig_pte = __ptep_get(ptep); + + WARN_ON_ONCE(pte_valid_cont(orig_pte)); + __set_pte(ptep, pte_mknoncont(pte)); +} + +#define set_ptes set_ptes +static __always_inline void set_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned int nr) +{ + pte = pte_mknoncont(pte); + + if (likely(nr == 1)) { + contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); + __set_ptes(mm, addr, ptep, pte, 1); + contpte_try_fold(mm, addr, ptep, pte); + } else { + contpte_set_ptes(mm, addr, ptep, pte, nr); + } +} + +static inline void pte_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); + __pte_clear(mm, addr, ptep); +} + +#define clear_full_ptes clear_full_ptes +static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr, int full) +{ + if (likely(nr == 1)) { + contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); + __clear_full_ptes(mm, addr, ptep, nr, full); + } else { + contpte_clear_full_ptes(mm, addr, ptep, nr, full); + } +} + +#define get_and_clear_full_ptes get_and_clear_full_ptes +static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + unsigned int nr, int full) +{ + pte_t pte; + + if (likely(nr == 1)) { + contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); + pte = __get_and_clear_full_ptes(mm, addr, ptep, nr, full); + } else { + pte = contpte_get_and_clear_full_ptes(mm, addr, ptep, nr, full); + } + + return pte; +} + +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); + return __ptep_get_and_clear(mm, addr, ptep); +} + +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + pte_t orig_pte = __ptep_get(ptep); + + if (likely(!pte_valid_cont(orig_pte))) + return __ptep_test_and_clear_young(vma, addr, ptep); + + return contpte_ptep_test_and_clear_young(vma, addr, ptep); +} + +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +static inline int ptep_clear_flush_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + pte_t orig_pte = __ptep_get(ptep); + + if (likely(!pte_valid_cont(orig_pte))) + return __ptep_clear_flush_young(vma, addr, ptep); + + return contpte_ptep_clear_flush_young(vma, addr, ptep); +} + +#define wrprotect_ptes wrprotect_ptes +static __always_inline void wrprotect_ptes(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, unsigned int nr) +{ + if (likely(nr == 1)) { + /* + * Optimization: wrprotect_ptes() can only be called for present + * ptes so we only need to check contig bit as condition for + * unfold, and we can remove the contig bit from the pte we read + * to avoid re-reading. This speeds up fork() which is sensitive + * for order-0 folios. Equivalent to contpte_try_unfold(). + */ + pte_t orig_pte = __ptep_get(ptep); + + if (unlikely(pte_cont(orig_pte))) { + __contpte_try_unfold(mm, addr, ptep, orig_pte); + orig_pte = pte_mknoncont(orig_pte); + } + ___ptep_set_wrprotect(mm, addr, ptep, orig_pte); + } else { + contpte_wrprotect_ptes(mm, addr, ptep, nr); + } +} + +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +static inline void ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + wrprotect_ptes(mm, addr, ptep, 1); +} + +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +static inline int ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t entry, int dirty) +{ + pte_t orig_pte = __ptep_get(ptep); + + entry = pte_mknoncont(entry); + + if (likely(!pte_valid_cont(orig_pte))) + return __ptep_set_access_flags(vma, addr, ptep, entry, dirty); + + return contpte_ptep_set_access_flags(vma, addr, ptep, entry, dirty); +} + +#define clear_young_dirty_ptes clear_young_dirty_ptes +static inline void clear_young_dirty_ptes(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + unsigned int nr, cydp_t flags) +{ + if (likely(nr == 1 && !pte_cont(__ptep_get(ptep)))) + __clear_young_dirty_ptes(vma, addr, ptep, nr, flags); + else + contpte_clear_young_dirty_ptes(vma, addr, ptep, nr, flags); +} + +#else /* CONFIG_ARM64_CONTPTE */ + +#define ptep_get __ptep_get +#define set_pte __set_pte +#define set_ptes __set_ptes +#define pte_clear __pte_clear +#define clear_full_ptes __clear_full_ptes +#define get_and_clear_full_ptes __get_and_clear_full_ptes +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +#define ptep_get_and_clear __ptep_get_and_clear +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +#define ptep_test_and_clear_young __ptep_test_and_clear_young +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +#define ptep_clear_flush_young __ptep_clear_flush_young +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +#define ptep_set_wrprotect __ptep_set_wrprotect +#define wrprotect_ptes __wrprotect_ptes +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +#define ptep_set_access_flags __ptep_set_access_flags +#define clear_young_dirty_ptes __clear_young_dirty_ptes + +#endif /* CONFIG_ARM64_CONTPTE */ + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ diff --git a/arch/arm64/include/asm/pkeys.h b/arch/arm64/include/asm/pkeys.h new file mode 100644 index 000000000000..0ca5f83ce148 --- /dev/null +++ b/arch/arm64/include/asm/pkeys.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Arm Ltd. + * + * Based on arch/x86/include/asm/pkeys.h + */ + +#ifndef _ASM_ARM64_PKEYS_H +#define _ASM_ARM64_PKEYS_H + +#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2) + +#define arch_max_pkey() 8 + +int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, + unsigned long init_val); + +static inline bool arch_pkeys_enabled(void) +{ + return system_supports_poe(); +} + +static inline int vma_pkey(struct vm_area_struct *vma) +{ + return (vma->vm_flags & ARCH_VM_PKEY_FLAGS) >> VM_PKEY_SHIFT; +} + +static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma, + int prot, int pkey) +{ + if (pkey != -1) + return pkey; + + return vma_pkey(vma); +} + +static inline int execute_only_pkey(struct mm_struct *mm) +{ + // Execute-only mappings are handled by EPAN/FEAT_PAN3. + return -1; +} + +#define mm_pkey_allocation_map(mm) (mm)->context.pkey_allocation_map +#define mm_set_pkey_allocated(mm, pkey) do { \ + mm_pkey_allocation_map(mm) |= (1U << pkey); \ +} while (0) +#define mm_set_pkey_free(mm, pkey) do { \ + mm_pkey_allocation_map(mm) &= ~(1U << pkey); \ +} while (0) + +static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) +{ + /* + * "Allocated" pkeys are those that have been returned + * from pkey_alloc() or pkey 0 which is allocated + * implicitly when the mm is created. + */ + if (pkey < 0 || pkey >= arch_max_pkey()) + return false; + + return mm_pkey_allocation_map(mm) & (1U << pkey); +} + +/* + * Returns a positive, 3-bit key on success, or -1 on failure. + */ +static inline int mm_pkey_alloc(struct mm_struct *mm) +{ + /* + * Note: this is the one and only place we make sure + * that the pkey is valid as far as the hardware is + * concerned. The rest of the kernel trusts that + * only good, valid pkeys come out of here. + */ + u8 all_pkeys_mask = GENMASK(arch_max_pkey() - 1, 0); + int ret; + + if (!arch_pkeys_enabled()) + return -1; + + /* + * Are we out of pkeys? We must handle this specially + * because ffz() behavior is undefined if there are no + * zeros. + */ + if (mm_pkey_allocation_map(mm) == all_pkeys_mask) + return -1; + + ret = ffz(mm_pkey_allocation_map(mm)); + + mm_set_pkey_allocated(mm, ret); + + return ret; +} + +static inline int mm_pkey_free(struct mm_struct *mm, int pkey) +{ + if (!mm_pkey_is_allocated(mm, pkey)) + return -EINVAL; + + mm_set_pkey_free(mm, pkey); + + return 0; +} + +#endif /* _ASM_ARM64_PKEYS_H */ diff --git a/arch/arm64/include/asm/por.h b/arch/arm64/include/asm/por.h new file mode 100644 index 000000000000..e06e9f473675 --- /dev/null +++ b/arch/arm64/include/asm/por.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Arm Ltd. + */ + +#ifndef _ASM_ARM64_POR_H +#define _ASM_ARM64_POR_H + +#define POR_BITS_PER_PKEY 4 +#define POR_ELx_IDX(por_elx, idx) (((por_elx) >> ((idx) * POR_BITS_PER_PKEY)) & 0xf) + +static inline bool por_elx_allows_read(u64 por, u8 pkey) +{ + u8 perm = POR_ELx_IDX(por, pkey); + + return perm & POE_R; +} + +static inline bool por_elx_allows_write(u64 por, u8 pkey) +{ + u8 perm = POR_ELx_IDX(por, pkey); + + return perm & POE_W; +} + +static inline bool por_elx_allows_exec(u64 por, u8 pkey) +{ + u8 perm = POR_ELx_IDX(por, pkey); + + return perm & POE_X; +} + +#endif /* _ASM_ARM64_POR_H */ diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h index 006946745352..d49368886309 100644 --- a/arch/arm64/include/asm/probes.h +++ b/arch/arm64/include/asm/probes.h @@ -9,21 +9,18 @@ #include <asm/insn.h> -typedef u32 probe_opcode_t; typedef void (probes_handler_t) (u32 opcode, long addr, struct pt_regs *); -/* architecture specific copy of original instruction */ struct arch_probe_insn { - probe_opcode_t *insn; - pstate_check_t *pstate_cc; probes_handler_t *handler; - /* restore address after step xol */ - unsigned long restore; }; #ifdef CONFIG_KPROBES -typedef u32 kprobe_opcode_t; +typedef __le32 kprobe_opcode_t; struct arch_specific_insn { struct arch_probe_insn api; + kprobe_opcode_t *xol_insn; + /* restore address after step xol */ + unsigned long xol_restore; }; #endif diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 5b0a04810b23..1bf1a3b16e88 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -155,6 +155,8 @@ struct thread_struct { struct { unsigned long tp_value; /* TLS register */ unsigned long tp2_value; + u64 fpmr; + unsigned long pad; struct user_fpsimd_state fpsimd_state; } uw; @@ -182,6 +184,14 @@ struct thread_struct { u64 sctlr_user; u64 svcr; u64 tpidr2_el0; + u64 por_el0; +#ifdef CONFIG_ARM64_GCS + unsigned int gcs_el0_mode; + unsigned int gcs_el0_locked; + u64 gcspr_el0; + u64 gcs_base; + u64 gcs_size; +#endif }; static inline unsigned int thread_get_vl(struct thread_struct *thread, @@ -253,6 +263,8 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, BUILD_BUG_ON(sizeof_field(struct thread_struct, uw) != sizeof_field(struct thread_struct, uw.tp_value) + sizeof_field(struct thread_struct, uw.tp2_value) + + sizeof_field(struct thread_struct, uw.fpmr) + + sizeof_field(struct thread_struct, uw.pad) + sizeof_field(struct thread_struct, uw.fpsimd_state)); *offset = offsetof(struct thread_struct, uw); @@ -280,22 +292,44 @@ void tls_preserve_current_state(void); .fpsimd_cpu = NR_CPUS, \ } -static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) +static inline void start_thread_common(struct pt_regs *regs, unsigned long pc, + unsigned long pstate) { - s32 previous_syscall = regs->syscallno; - memset(regs, 0, sizeof(*regs)); - regs->syscallno = previous_syscall; - regs->pc = pc; + /* + * Ensure all GPRs are zeroed, and initialize PC + PSTATE. + * The SP (or compat SP) will be initialized later. + */ + regs->user_regs = (struct user_pt_regs) { + .pc = pc, + .pstate = pstate, + }; + /* + * To allow the syscalls:sys_exit_execve tracepoint we need to preserve + * syscallno, but do not need orig_x0 or the original GPRs. + */ + regs->orig_x0 = 0; + + /* + * An exec from a kernel thread won't have an existing PMR value. + */ if (system_uses_irq_prio_masking()) - regs->pmr_save = GIC_PRIO_IRQON; + regs->pmr = GIC_PRIO_IRQON; + + /* + * The pt_regs::stackframe field must remain valid throughout this + * function as a stacktrace can be taken at any time. Any user or + * kernel task should have a valid final frame. + */ + WARN_ON_ONCE(regs->stackframe.record.fp != 0); + WARN_ON_ONCE(regs->stackframe.record.lr != 0); + WARN_ON_ONCE(regs->stackframe.type != FRAME_META_TYPE_FINAL); } static inline void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { - start_thread_common(regs, pc); - regs->pstate = PSR_MODE_EL0t; + start_thread_common(regs, pc, PSR_MODE_EL0t); spectre_v4_enable_task_mitigation(current); regs->sp = sp; } @@ -304,15 +338,13 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc, static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { - start_thread_common(regs, pc); - regs->pstate = PSR_AA32_MODE_USR; + unsigned long pstate = PSR_AA32_MODE_USR; if (pc & 1) - regs->pstate |= PSR_AA32_T_BIT; - -#ifdef __AARCH64EB__ - regs->pstate |= PSR_AA32_E_BIT; -#endif + pstate |= PSR_AA32_T_BIT; + if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) + pstate |= PSR_AA32_E_BIT; + start_thread_common(regs, pc, pstate); spectre_v4_enable_task_mitigation(current); regs->compat_sp = sp; } @@ -398,5 +430,10 @@ long get_tagged_addr_ctrl(struct task_struct *task); #define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl(current) #endif +int get_tsc_mode(unsigned long adr); +int set_tsc_mode(unsigned int val); +#define GET_TSC_CTL(adr) get_tsc_mode((adr)) +#define SET_TSC_CTL(val) set_tsc_mode((val)) + #endif /* __ASSEMBLY__ */ #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index 581caac525b0..6cf4aae05219 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -5,6 +5,8 @@ #ifndef __ASM_PTDUMP_H #define __ASM_PTDUMP_H +#include <linux/ptdump.h> + #ifdef CONFIG_PTDUMP_CORE #include <linux/mm_types.h> @@ -21,21 +23,53 @@ struct ptdump_info { unsigned long base_addr; }; +struct ptdump_prot_bits { + u64 mask; + u64 val; + const char *set; + const char *clear; +}; + +struct ptdump_pg_level { + const struct ptdump_prot_bits *bits; + char name[4]; + int num; + u64 mask; +}; + +/* + * The page dumper groups page table entries of the same type into a single + * description. It uses pg_state to track the range information while + * iterating over the pte entries. When the continuity is broken it then + * dumps out a description of the range. + */ +struct ptdump_pg_state { + struct ptdump_state ptdump; + struct ptdump_pg_level *pg_level; + struct seq_file *seq; + const struct addr_marker *marker; + const struct mm_struct *mm; + unsigned long start_address; + int level; + u64 current_prot; + bool check_wx; + unsigned long wx_pages; + unsigned long uxn_pages; +}; + void ptdump_walk(struct seq_file *s, struct ptdump_info *info); +void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, + u64 val); #ifdef CONFIG_PTDUMP_DEBUGFS #define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64 void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name); #else static inline void ptdump_debugfs_register(struct ptdump_info *info, const char *name) { } -#endif -void ptdump_check_wx(void); -#endif /* CONFIG_PTDUMP_CORE */ - -#ifdef CONFIG_DEBUG_WX -#define debug_checkwx() ptdump_check_wx() +#endif /* CONFIG_PTDUMP_DEBUGFS */ #else -#define debug_checkwx() do { } while (0) -#endif +static inline void note_page(struct ptdump_state *pt_st, unsigned long addr, + int level, u64 val) { } +#endif /* CONFIG_PTDUMP_CORE */ #endif /* __ASM_PTDUMP_H */ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 47ec58031f11..47ff8654c5ec 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -21,35 +21,12 @@ #define INIT_PSTATE_EL2 \ (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL2h) -/* - * PMR values used to mask/unmask interrupts. - * - * GIC priority masking works as follows: if an IRQ's priority is a higher value - * than the value held in PMR, that IRQ is masked. Lowering the value of PMR - * means masking more IRQs (or at least that the same IRQs remain masked). - * - * To mask interrupts, we clear the most significant bit of PMR. - * - * Some code sections either automatically switch back to PSR.I or explicitly - * require to not use priority masking. If bit GIC_PRIO_PSR_I_SET is included - * in the priority mask, it indicates that PSR.I should be set and - * interrupt disabling temporarily does not rely on IRQ priorities. - */ -#define GIC_PRIO_IRQON 0xe0 -#define __GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) -#define __GIC_PRIO_IRQOFF_NS 0xa0 -#define GIC_PRIO_PSR_I_SET (1 << 4) - -#define GIC_PRIO_IRQOFF \ - ({ \ - extern struct static_key_false gic_nonsecure_priorities;\ - u8 __prio = __GIC_PRIO_IRQOFF; \ - \ - if (static_branch_unlikely(&gic_nonsecure_priorities)) \ - __prio = __GIC_PRIO_IRQOFF_NS; \ - \ - __prio; \ - }) +#include <linux/irqchip/arm-gic-v3-prio.h> + +#define GIC_PRIO_IRQON GICV3_PRIO_UNMASKED +#define GIC_PRIO_IRQOFF GICV3_PRIO_IRQ + +#define GIC_PRIO_PSR_I_SET GICV3_PRIO_PSR_I_SET /* Additional SPSR bits not exposed in the UABI */ #define PSR_MODE_THREAD_BIT (1 << 0) @@ -121,6 +98,8 @@ #include <linux/bug.h> #include <linux/types.h> +#include <asm/stacktrace/frame.h> + /* sizeof(struct user) for AArch32 */ #define COMPAT_USER_SZ 296 @@ -172,8 +151,7 @@ static inline unsigned long pstate_to_compat_psr(const unsigned long pstate) /* * This struct defines the way the registers are stored on the stack during an - * exception. Note that sizeof(struct pt_regs) has to be a multiple of 16 (for - * stack alignment). struct user_pt_regs must form a prefix of struct pt_regs. + * exception. struct user_pt_regs must form a prefix of struct pt_regs. */ struct pt_regs { union { @@ -186,23 +164,20 @@ struct pt_regs { }; }; u64 orig_x0; -#ifdef __AARCH64EB__ - u32 unused2; - s32 syscallno; -#else s32 syscallno; - u32 unused2; -#endif + u32 pmr; + u64 sdei_ttbr1; - /* Only valid when ARM64_HAS_GIC_PRIO_MASKING is enabled. */ - u64 pmr_save; - u64 stackframe[2]; + struct frame_record_meta stackframe; /* Only valid for some EL1 exceptions. */ u64 lockdep_hardirqs; u64 exit_rcu; }; +/* For correct stack alignment, pt_regs has to be a multiple of 16 bytes. */ +static_assert(IS_ALIGNED(sizeof(struct pt_regs), 16)); + static inline bool in_syscall(struct pt_regs const *regs) { return regs->syscallno != NO_SYSCALL; @@ -236,7 +211,7 @@ static inline void forget_syscall(struct pt_regs *regs) #define irqs_priority_unmasked(regs) \ (system_uses_irq_prio_masking() ? \ - (regs)->pmr_save == GIC_PRIO_IRQON : \ + (regs)->pmr == GIC_PRIO_IRQON : \ true) #define interrupts_enabled(regs) \ diff --git a/arch/arm64/include/asm/rsi.h b/arch/arm64/include/asm/rsi.h new file mode 100644 index 000000000000..b42aeac05340 --- /dev/null +++ b/arch/arm64/include/asm/rsi.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 ARM Ltd. + */ + +#ifndef __ASM_RSI_H_ +#define __ASM_RSI_H_ + +#include <linux/errno.h> +#include <linux/jump_label.h> +#include <asm/rsi_cmds.h> + +#define RSI_PDEV_NAME "arm-cca-dev" + +DECLARE_STATIC_KEY_FALSE(rsi_present); + +void __init arm64_rsi_init(void); + +bool __arm64_is_protected_mmio(phys_addr_t base, size_t size); + +static inline bool is_realm_world(void) +{ + return static_branch_unlikely(&rsi_present); +} + +static inline int rsi_set_memory_range(phys_addr_t start, phys_addr_t end, + enum ripas state, unsigned long flags) +{ + unsigned long ret; + phys_addr_t top; + + while (start != end) { + ret = rsi_set_addr_range_state(start, end, state, flags, &top); + if (ret || top < start || top > end) + return -EINVAL; + start = top; + } + + return 0; +} + +/* + * Convert the specified range to RAM. Do not use this if you rely on the + * contents of a page that may already be in RAM state. + */ +static inline int rsi_set_memory_range_protected(phys_addr_t start, + phys_addr_t end) +{ + return rsi_set_memory_range(start, end, RSI_RIPAS_RAM, + RSI_CHANGE_DESTROYED); +} + +/* + * Convert the specified range to RAM. Do not convert any pages that may have + * been DESTROYED, without our permission. + */ +static inline int rsi_set_memory_range_protected_safe(phys_addr_t start, + phys_addr_t end) +{ + return rsi_set_memory_range(start, end, RSI_RIPAS_RAM, + RSI_NO_CHANGE_DESTROYED); +} + +static inline int rsi_set_memory_range_shared(phys_addr_t start, + phys_addr_t end) +{ + return rsi_set_memory_range(start, end, RSI_RIPAS_EMPTY, + RSI_CHANGE_DESTROYED); +} +#endif /* __ASM_RSI_H_ */ diff --git a/arch/arm64/include/asm/rsi_cmds.h b/arch/arm64/include/asm/rsi_cmds.h new file mode 100644 index 000000000000..e6a211001bd3 --- /dev/null +++ b/arch/arm64/include/asm/rsi_cmds.h @@ -0,0 +1,160 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 ARM Ltd. + */ + +#ifndef __ASM_RSI_CMDS_H +#define __ASM_RSI_CMDS_H + +#include <linux/arm-smccc.h> + +#include <asm/rsi_smc.h> + +#define RSI_GRANULE_SHIFT 12 +#define RSI_GRANULE_SIZE (_AC(1, UL) << RSI_GRANULE_SHIFT) + +enum ripas { + RSI_RIPAS_EMPTY = 0, + RSI_RIPAS_RAM = 1, + RSI_RIPAS_DESTROYED = 2, + RSI_RIPAS_DEV = 3, +}; + +static inline unsigned long rsi_request_version(unsigned long req, + unsigned long *out_lower, + unsigned long *out_higher) +{ + struct arm_smccc_res res; + + arm_smccc_smc(SMC_RSI_ABI_VERSION, req, 0, 0, 0, 0, 0, 0, &res); + + if (out_lower) + *out_lower = res.a1; + if (out_higher) + *out_higher = res.a2; + + return res.a0; +} + +static inline unsigned long rsi_get_realm_config(struct realm_config *cfg) +{ + struct arm_smccc_res res; + + arm_smccc_smc(SMC_RSI_REALM_CONFIG, virt_to_phys(cfg), + 0, 0, 0, 0, 0, 0, &res); + return res.a0; +} + +static inline unsigned long rsi_ipa_state_get(phys_addr_t start, + phys_addr_t end, + enum ripas *state, + phys_addr_t *top) +{ + struct arm_smccc_res res; + + arm_smccc_smc(SMC_RSI_IPA_STATE_GET, + start, end, 0, 0, 0, 0, 0, + &res); + + if (res.a0 == RSI_SUCCESS) { + if (top) + *top = res.a1; + if (state) + *state = res.a2; + } + + return res.a0; +} + +static inline long rsi_set_addr_range_state(phys_addr_t start, + phys_addr_t end, + enum ripas state, + unsigned long flags, + phys_addr_t *top) +{ + struct arm_smccc_res res; + + arm_smccc_smc(SMC_RSI_IPA_STATE_SET, start, end, state, + flags, 0, 0, 0, &res); + + if (top) + *top = res.a1; + + if (res.a2 != RSI_ACCEPT) + return -EPERM; + + return res.a0; +} + +/** + * rsi_attestation_token_init - Initialise the operation to retrieve an + * attestation token. + * + * @challenge: The challenge data to be used in the attestation token + * generation. + * @size: Size of the challenge data in bytes. + * + * Initialises the attestation token generation and returns an upper bound + * on the attestation token size that can be used to allocate an adequate + * buffer. The caller is expected to subsequently call + * rsi_attestation_token_continue() to retrieve the attestation token data on + * the same CPU. + * + * Returns: + * On success, returns the upper limit of the attestation report size. + * Otherwise, -EINVAL + */ +static inline long +rsi_attestation_token_init(const u8 *challenge, unsigned long size) +{ + struct arm_smccc_1_2_regs regs = { 0 }; + + /* The challenge must be at least 32bytes and at most 64bytes */ + if (!challenge || size < 32 || size > 64) + return -EINVAL; + + regs.a0 = SMC_RSI_ATTESTATION_TOKEN_INIT; + memcpy(®s.a1, challenge, size); + arm_smccc_1_2_smc(®s, ®s); + + if (regs.a0 == RSI_SUCCESS) + return regs.a1; + + return -EINVAL; +} + +/** + * rsi_attestation_token_continue - Continue the operation to retrieve an + * attestation token. + * + * @granule: {I}PA of the Granule to which the token will be written. + * @offset: Offset within Granule to start of buffer in bytes. + * @size: The size of the buffer. + * @len: The number of bytes written to the buffer. + * + * Retrieves up to a RSI_GRANULE_SIZE worth of token data per call. The caller + * is expected to call rsi_attestation_token_init() before calling this + * function to retrieve the attestation token. + * + * Return: + * * %RSI_SUCCESS - Attestation token retrieved successfully. + * * %RSI_INCOMPLETE - Token generation is not complete. + * * %RSI_ERROR_INPUT - A parameter was not valid. + * * %RSI_ERROR_STATE - Attestation not in progress. + */ +static inline unsigned long rsi_attestation_token_continue(phys_addr_t granule, + unsigned long offset, + unsigned long size, + unsigned long *len) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RSI_ATTESTATION_TOKEN_CONTINUE, + granule, offset, size, 0, &res); + + if (len) + *len = res.a1; + return res.a0; +} + +#endif /* __ASM_RSI_CMDS_H */ diff --git a/arch/arm64/include/asm/rsi_smc.h b/arch/arm64/include/asm/rsi_smc.h new file mode 100644 index 000000000000..6cb070eca9e9 --- /dev/null +++ b/arch/arm64/include/asm/rsi_smc.h @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 ARM Ltd. + */ + +#ifndef __ASM_RSI_SMC_H_ +#define __ASM_RSI_SMC_H_ + +#include <linux/arm-smccc.h> + +/* + * This file describes the Realm Services Interface (RSI) Application Binary + * Interface (ABI) for SMC calls made from within the Realm to the RMM and + * serviced by the RMM. + */ + +/* + * The major version number of the RSI implementation. This is increased when + * the binary format or semantics of the SMC calls change. + */ +#define RSI_ABI_VERSION_MAJOR UL(1) + +/* + * The minor version number of the RSI implementation. This is increased when + * a bug is fixed, or a feature is added without breaking binary compatibility. + */ +#define RSI_ABI_VERSION_MINOR UL(0) + +#define RSI_ABI_VERSION ((RSI_ABI_VERSION_MAJOR << 16) | \ + RSI_ABI_VERSION_MINOR) + +#define RSI_ABI_VERSION_GET_MAJOR(_version) ((_version) >> 16) +#define RSI_ABI_VERSION_GET_MINOR(_version) ((_version) & 0xFFFF) + +#define RSI_SUCCESS UL(0) +#define RSI_ERROR_INPUT UL(1) +#define RSI_ERROR_STATE UL(2) +#define RSI_INCOMPLETE UL(3) +#define RSI_ERROR_UNKNOWN UL(4) + +#define SMC_RSI_FID(n) ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD, \ + n) + +/* + * Returns RSI version. + * + * arg1 == Requested interface revision + * ret0 == Status / error + * ret1 == Lower implemented interface revision + * ret2 == Higher implemented interface revision + */ +#define SMC_RSI_ABI_VERSION SMC_RSI_FID(0x190) + +/* + * Read feature register. + * + * arg1 == Feature register index + * ret0 == Status / error + * ret1 == Feature register value + */ +#define SMC_RSI_FEATURES SMC_RSI_FID(0x191) + +/* + * Read measurement for the current Realm. + * + * arg1 == Index, which measurements slot to read + * ret0 == Status / error + * ret1 == Measurement value, bytes: 0 - 7 + * ret2 == Measurement value, bytes: 8 - 15 + * ret3 == Measurement value, bytes: 16 - 23 + * ret4 == Measurement value, bytes: 24 - 31 + * ret5 == Measurement value, bytes: 32 - 39 + * ret6 == Measurement value, bytes: 40 - 47 + * ret7 == Measurement value, bytes: 48 - 55 + * ret8 == Measurement value, bytes: 56 - 63 + */ +#define SMC_RSI_MEASUREMENT_READ SMC_RSI_FID(0x192) + +/* + * Extend Realm Extensible Measurement (REM) value. + * + * arg1 == Index, which measurements slot to extend + * arg2 == Size of realm measurement in bytes, max 64 bytes + * arg3 == Measurement value, bytes: 0 - 7 + * arg4 == Measurement value, bytes: 8 - 15 + * arg5 == Measurement value, bytes: 16 - 23 + * arg6 == Measurement value, bytes: 24 - 31 + * arg7 == Measurement value, bytes: 32 - 39 + * arg8 == Measurement value, bytes: 40 - 47 + * arg9 == Measurement value, bytes: 48 - 55 + * arg10 == Measurement value, bytes: 56 - 63 + * ret0 == Status / error + */ +#define SMC_RSI_MEASUREMENT_EXTEND SMC_RSI_FID(0x193) + +/* + * Initialize the operation to retrieve an attestation token. + * + * arg1 == Challenge value, bytes: 0 - 7 + * arg2 == Challenge value, bytes: 8 - 15 + * arg3 == Challenge value, bytes: 16 - 23 + * arg4 == Challenge value, bytes: 24 - 31 + * arg5 == Challenge value, bytes: 32 - 39 + * arg6 == Challenge value, bytes: 40 - 47 + * arg7 == Challenge value, bytes: 48 - 55 + * arg8 == Challenge value, bytes: 56 - 63 + * ret0 == Status / error + * ret1 == Upper bound of token size in bytes + */ +#define SMC_RSI_ATTESTATION_TOKEN_INIT SMC_RSI_FID(0x194) + +/* + * Continue the operation to retrieve an attestation token. + * + * arg1 == The IPA of token buffer + * arg2 == Offset within the granule of the token buffer + * arg3 == Size of the granule buffer + * ret0 == Status / error + * ret1 == Length of token bytes copied to the granule buffer + */ +#define SMC_RSI_ATTESTATION_TOKEN_CONTINUE SMC_RSI_FID(0x195) + +#ifndef __ASSEMBLY__ + +struct realm_config { + union { + struct { + unsigned long ipa_bits; /* Width of IPA in bits */ + unsigned long hash_algo; /* Hash algorithm */ + }; + u8 pad[0x200]; + }; + union { + u8 rpv[64]; /* Realm Personalization Value */ + u8 pad2[0xe00]; + }; + /* + * The RMM requires the configuration structure to be aligned to a 4k + * boundary, ensure this happens by aligning this structure. + */ +} __aligned(0x1000); + +#endif /* __ASSEMBLY__ */ + +/* + * Read configuration for the current Realm. + * + * arg1 == struct realm_config addr + * ret0 == Status / error + */ +#define SMC_RSI_REALM_CONFIG SMC_RSI_FID(0x196) + +/* + * Request RIPAS of a target IPA range to be changed to a specified value. + * + * arg1 == Base IPA address of target region + * arg2 == Top of the region + * arg3 == RIPAS value + * arg4 == flags + * ret0 == Status / error + * ret1 == Top of modified IPA range + * ret2 == Whether the Host accepted or rejected the request + */ +#define SMC_RSI_IPA_STATE_SET SMC_RSI_FID(0x197) + +#define RSI_NO_CHANGE_DESTROYED UL(0) +#define RSI_CHANGE_DESTROYED UL(1) + +#define RSI_ACCEPT UL(0) +#define RSI_REJECT UL(1) + +/* + * Get RIPAS of a target IPA range. + * + * arg1 == Base IPA of target region + * arg2 == End of target IPA region + * ret0 == Status / error + * ret1 == Top of IPA region which has the reported RIPAS value + * ret2 == RIPAS value + */ +#define SMC_RSI_IPA_STATE_GET SMC_RSI_FID(0x198) + +/* + * Make a Host call. + * + * arg1 == IPA of host call structure + * ret0 == Status / error + */ +#define SMC_RSI_HOST_CALL SMC_RSI_FID(0x199) + +#endif /* __ASM_RSI_SMC_H_ */ diff --git a/arch/arm64/include/asm/runtime-const.h b/arch/arm64/include/asm/runtime-const.h new file mode 100644 index 000000000000..be5915669d23 --- /dev/null +++ b/arch/arm64/include/asm/runtime-const.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RUNTIME_CONST_H +#define _ASM_RUNTIME_CONST_H + +#include <asm/cacheflush.h> + +/* Sigh. You can still run arm64 in BE mode */ +#include <asm/byteorder.h> + +#define runtime_const_ptr(sym) ({ \ + typeof(sym) __ret; \ + asm_inline("1:\t" \ + "movz %0, #0xcdef\n\t" \ + "movk %0, #0x89ab, lsl #16\n\t" \ + "movk %0, #0x4567, lsl #32\n\t" \ + "movk %0, #0x0123, lsl #48\n\t" \ + ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ + ".long 1b - .\n\t" \ + ".popsection" \ + :"=r" (__ret)); \ + __ret; }) + +#define runtime_const_shift_right_32(val, sym) ({ \ + unsigned long __ret; \ + asm_inline("1:\t" \ + "lsr %w0,%w1,#12\n\t" \ + ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \ + ".long 1b - .\n\t" \ + ".popsection" \ + :"=r" (__ret) \ + :"r" (0u+(val))); \ + __ret; }) + +#define runtime_const_init(type, sym) do { \ + extern s32 __start_runtime_##type##_##sym[]; \ + extern s32 __stop_runtime_##type##_##sym[]; \ + runtime_const_fixup(__runtime_fixup_##type, \ + (unsigned long)(sym), \ + __start_runtime_##type##_##sym, \ + __stop_runtime_##type##_##sym); \ +} while (0) + +/* 16-bit immediate for wide move (movz and movk) in bits 5..20 */ +static inline void __runtime_fixup_16(__le32 *p, unsigned int val) +{ + u32 insn = le32_to_cpu(*p); + insn &= 0xffe0001f; + insn |= (val & 0xffff) << 5; + *p = cpu_to_le32(insn); +} + +static inline void __runtime_fixup_caches(void *where, unsigned int insns) +{ + unsigned long va = (unsigned long)where; + caches_clean_inval_pou(va, va + 4*insns); +} + +static inline void __runtime_fixup_ptr(void *where, unsigned long val) +{ + __le32 *p = lm_alias(where); + __runtime_fixup_16(p, val); + __runtime_fixup_16(p+1, val >> 16); + __runtime_fixup_16(p+2, val >> 32); + __runtime_fixup_16(p+3, val >> 48); + __runtime_fixup_caches(where, 4); +} + +/* Immediate value is 6 bits starting at bit #16 */ +static inline void __runtime_fixup_shift(void *where, unsigned long val) +{ + __le32 *p = lm_alias(where); + u32 insn = le32_to_cpu(*p); + insn &= 0xffc0ffff; + insn |= (val & 63) << 16; + *p = cpu_to_le32(insn); + __runtime_fixup_caches(where, 1); +} + +static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), + unsigned long val, s32 *start, s32 *end) +{ + while (start < end) { + fn(*start + (void *)start, val); + start++; + } +} + +#endif diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h index 3fdae5fe3142..a76f9b387a26 100644 --- a/arch/arm64/include/asm/scs.h +++ b/arch/arm64/include/asm/scs.h @@ -33,37 +33,11 @@ #include <asm/cpufeature.h> #ifdef CONFIG_UNWIND_PATCH_PAC_INTO_SCS -static inline bool should_patch_pac_into_scs(void) -{ - u64 reg; - - /* - * We only enable the shadow call stack dynamically if we are running - * on a system that does not implement PAC or BTI. PAC and SCS provide - * roughly the same level of protection, and BTI relies on the PACIASP - * instructions serving as landing pads, preventing us from patching - * those instructions into something else. - */ - reg = read_sysreg_s(SYS_ID_AA64ISAR1_EL1); - if (SYS_FIELD_GET(ID_AA64ISAR1_EL1, APA, reg) | - SYS_FIELD_GET(ID_AA64ISAR1_EL1, API, reg)) - return false; - - reg = read_sysreg_s(SYS_ID_AA64ISAR2_EL1); - if (SYS_FIELD_GET(ID_AA64ISAR2_EL1, APA3, reg)) - return false; - - if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) { - reg = read_sysreg_s(SYS_ID_AA64PFR1_EL1); - if (reg & (0xf << ID_AA64PFR1_EL1_BT_SHIFT)) - return false; - } - return true; -} - static inline void dynamic_scs_init(void) { - if (should_patch_pac_into_scs()) { + extern bool __pi_dynamic_scs_is_enabled; + + if (__pi_dynamic_scs_is_enabled) { pr_info("Enabling dynamic shadow call stack\n"); static_branch_enable(&dynamic_scs_enabled); } @@ -72,8 +46,14 @@ static inline void dynamic_scs_init(void) static inline void dynamic_scs_init(void) {} #endif -int scs_patch(const u8 eh_frame[], int size); -asmlinkage void scs_patch_vmlinux(void); +enum { + EDYNSCS_INVALID_CIE_HEADER = 1, + EDYNSCS_INVALID_CIE_SDATA_SIZE = 2, + EDYNSCS_INVALID_FDE_AUGM_DATA_SIZE = 3, + EDYNSCS_INVALID_CFA_OPCODE = 4, +}; + +int __pi_scs_patch(const u8 eh_frame[], int size); #endif /* __ASSEMBLY __ */ diff --git a/arch/arm64/include/asm/seccomp.h b/arch/arm64/include/asm/seccomp.h index 30256233788b..bf6bf40bc5ab 100644 --- a/arch/arm64/include/asm/seccomp.h +++ b/arch/arm64/include/asm/seccomp.h @@ -8,13 +8,13 @@ #ifndef _ASM_SECCOMP_H #define _ASM_SECCOMP_H -#include <asm/unistd.h> +#include <asm/unistd_compat_32.h> #ifdef CONFIG_COMPAT -#define __NR_seccomp_read_32 __NR_compat_read -#define __NR_seccomp_write_32 __NR_compat_write -#define __NR_seccomp_exit_32 __NR_compat_exit -#define __NR_seccomp_sigreturn_32 __NR_compat_rt_sigreturn +#define __NR_seccomp_read_32 __NR_compat32_read +#define __NR_seccomp_write_32 __NR_compat32_write +#define __NR_seccomp_exit_32 __NR_compat32_exit +#define __NR_seccomp_sigreturn_32 __NR_compat32_rt_sigreturn #endif /* CONFIG_COMPAT */ #include <asm-generic/seccomp.h> @@ -24,7 +24,7 @@ #define SECCOMP_ARCH_NATIVE_NAME "aarch64" #ifdef CONFIG_COMPAT # define SECCOMP_ARCH_COMPAT AUDIT_ARCH_ARM -# define SECCOMP_ARCH_COMPAT_NR __NR_compat_syscalls +# define SECCOMP_ARCH_COMPAT_NR __NR_compat32_syscalls # define SECCOMP_ARCH_COMPAT_NAME "arm" #endif diff --git a/arch/arm64/include/asm/set_memory.h b/arch/arm64/include/asm/set_memory.h index 0f740b781187..90f61b17275e 100644 --- a/arch/arm64/include/asm/set_memory.h +++ b/arch/arm64/include/asm/set_memory.h @@ -3,6 +3,7 @@ #ifndef _ASM_ARM64_SET_MEMORY_H #define _ASM_ARM64_SET_MEMORY_H +#include <asm/mem_encrypt.h> #include <asm-generic/set_memory.h> bool can_set_direct_map(void); @@ -12,6 +13,10 @@ int set_memory_valid(unsigned long addr, int numpages, int enable); int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); +int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid); bool kernel_page_present(struct page *page); +int set_memory_encrypted(unsigned long addr, int numpages); +int set_memory_decrypted(unsigned long addr, int numpages); + #endif /* _ASM_ARM64_SET_MEMORY_H */ diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h index 2e4d7da74fb8..ba269a7a3201 100644 --- a/arch/arm64/include/asm/setup.h +++ b/arch/arm64/include/asm/setup.h @@ -7,9 +7,6 @@ #include <uapi/asm/setup.h> -void *get_early_fdt_ptr(void); -void early_fdt_map(u64 dt_phys); - /* * These two variables are used in the head.S file. */ diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index efb13112b408..2510eec026f7 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -25,22 +25,11 @@ #ifndef __ASSEMBLY__ -#include <asm/percpu.h> - #include <linux/threads.h> #include <linux/cpumask.h> #include <linux/thread_info.h> -DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); - -/* - * We don't use this_cpu_read(cpu_number) as that has implicit writes to - * preempt_count, and associated (compiler) barriers, that we'd like to avoid - * the expense of. If we're preemptible, the value can be stale at use anyway. - * And we can't use this_cpu_ptr() either, as that winds up recursing back - * here under CONFIG_DEBUG_PREEMPT=y. - */ -#define raw_smp_processor_id() (*raw_cpu_ptr(&cpu_number)) +#define raw_smp_processor_id() (current_thread_info()->cpu) /* * Logical CPU mapping. diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h index 8a8acc220371..84783efdc9d1 100644 --- a/arch/arm64/include/asm/sparsemem.h +++ b/arch/arm64/include/asm/sparsemem.h @@ -5,7 +5,10 @@ #ifndef __ASM_SPARSEMEM_H #define __ASM_SPARSEMEM_H -#define MAX_PHYSMEM_BITS CONFIG_ARM64_PA_BITS +#include <asm/pgtable-prot.h> + +#define MAX_PHYSMEM_BITS PHYS_MASK_SHIFT +#define MAX_POSSIBLE_PHYSMEM_BITS (52) /* * Section size must be at least 512MB for 64K base diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h index 11ab1c077697..7cde3d8bd0ad 100644 --- a/arch/arm64/include/asm/spinlock_types.h +++ b/arch/arm64/include/asm/spinlock_types.h @@ -6,7 +6,7 @@ #define __ASM_SPINLOCK_TYPES_H #if !defined(__LINUX_SPINLOCK_TYPES_RAW_H) && !defined(__ASM_SPINLOCK_H) -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif #include <asm-generic/qspinlock_types.h> diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h index f63dc654e545..821a8fdd31af 100644 --- a/arch/arm64/include/asm/stacktrace/common.h +++ b/arch/arm64/include/asm/stacktrace/common.h @@ -60,13 +60,27 @@ static inline void unwind_init_common(struct unwind_state *state) state->stack = stackinfo_get_unknown(); } -static struct stack_info *unwind_find_next_stack(const struct unwind_state *state, - unsigned long sp, - unsigned long size) +/** + * unwind_find_stack() - Find the accessible stack which entirely contains an + * object. + * + * @state: the current unwind state. + * @sp: the base address of the object. + * @size: the size of the object. + * + * Return: a pointer to the relevant stack_info if found; NULL otherwise. + */ +static struct stack_info *unwind_find_stack(struct unwind_state *state, + unsigned long sp, + unsigned long size) { - for (int i = 0; i < state->nr_stacks; i++) { - struct stack_info *info = &state->stacks[i]; + struct stack_info *info = &state->stack; + if (stackinfo_on_stack(info, sp, size)) + return info; + + for (int i = 0; i < state->nr_stacks; i++) { + info = &state->stacks[i]; if (stackinfo_on_stack(info, sp, size)) return info; } @@ -75,36 +89,31 @@ static struct stack_info *unwind_find_next_stack(const struct unwind_state *stat } /** - * unwind_consume_stack() - Check if an object is on an accessible stack, - * updating stack boundaries so that future unwind steps cannot consume this - * object again. + * unwind_consume_stack() - Update stack boundaries so that future unwind steps + * cannot consume this object again. * * @state: the current unwind state. + * @info: the stack_info of the stack containing the object. * @sp: the base address of the object. * @size: the size of the object. * * Return: 0 upon success, an error code otherwise. */ -static inline int unwind_consume_stack(struct unwind_state *state, - unsigned long sp, - unsigned long size) +static inline void unwind_consume_stack(struct unwind_state *state, + struct stack_info *info, + unsigned long sp, + unsigned long size) { - struct stack_info *next; - - if (stackinfo_on_stack(&state->stack, sp, size)) - goto found; - - next = unwind_find_next_stack(state, sp, size); - if (!next) - return -EINVAL; + struct stack_info tmp; /* * Stack transitions are strictly one-way, and once we've * transitioned from one stack to another, it's never valid to * unwind back to the old stack. * - * Remove the current stack from the list of stacks so that it cannot - * be found on a subsequent transition. + * Destroy the old stack info so that it cannot be found upon a + * subsequent transition. If the stack has not changed, we'll + * immediately restore the current stack info. * * Note that stacks can nest in several valid orders, e.g. * @@ -115,16 +124,15 @@ static inline int unwind_consume_stack(struct unwind_state *state, * ... so we do not check the specific order of stack * transitions. */ - state->stack = *next; - *next = stackinfo_get_unknown(); + tmp = *info; + *info = stackinfo_get_unknown(); + state->stack = tmp; -found: /* * Future unwind steps can only consume stack above this frame record. * Update the current stack to start immediately above it. */ state->stack.low = sp + size; - return 0; } /** @@ -137,21 +145,25 @@ found: static inline int unwind_next_frame_record(struct unwind_state *state) { + struct stack_info *info; + struct frame_record *record; unsigned long fp = state->fp; - int err; if (fp & 0x7) return -EINVAL; - err = unwind_consume_stack(state, fp, 16); - if (err) - return err; + info = unwind_find_stack(state, fp, sizeof(*record)); + if (!info) + return -EINVAL; + + unwind_consume_stack(state, info, fp, sizeof(*record)); /* * Record this frame record's values. */ - state->fp = READ_ONCE(*(unsigned long *)(fp)); - state->pc = READ_ONCE(*(unsigned long *)(fp + 8)); + record = (struct frame_record *)fp; + state->fp = READ_ONCE(record->fp); + state->pc = READ_ONCE(record->lr); return 0; } diff --git a/arch/arm64/include/asm/stacktrace/frame.h b/arch/arm64/include/asm/stacktrace/frame.h new file mode 100644 index 000000000000..0ee0f6ba0fd8 --- /dev/null +++ b/arch/arm64/include/asm/stacktrace/frame.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_STACKTRACE_FRAME_H +#define __ASM_STACKTRACE_FRAME_H + +/* + * - FRAME_META_TYPE_NONE + * + * This value is reserved. + * + * - FRAME_META_TYPE_FINAL + * + * The record is the last entry on the stack. + * Unwinding should terminate successfully. + * + * - FRAME_META_TYPE_PT_REGS + * + * The record is embedded within a struct pt_regs, recording the registers at + * an arbitrary point in time. + * Unwinding should consume pt_regs::pc, followed by pt_regs::lr. + * + * Note: all other values are reserved and should result in unwinding + * terminating with an error. + */ +#define FRAME_META_TYPE_NONE 0 +#define FRAME_META_TYPE_FINAL 1 +#define FRAME_META_TYPE_PT_REGS 2 + +#ifndef __ASSEMBLY__ +/* + * A standard AAPCS64 frame record. + */ +struct frame_record { + u64 fp; + u64 lr; +}; + +/* + * A metadata frame record indicating a special unwind. + * The record::{fp,lr} fields must be zero to indicate the presence of + * metadata. + */ +struct frame_record_meta { + struct frame_record record; + u64 type; +}; +#endif /* __ASSEMBLY */ + +#endif /* __ASM_STACKTRACE_FRAME_H */ diff --git a/arch/arm64/include/asm/stacktrace/nvhe.h b/arch/arm64/include/asm/stacktrace/nvhe.h index 44759281d0d4..171f9edef49f 100644 --- a/arch/arm64/include/asm/stacktrace/nvhe.h +++ b/arch/arm64/include/asm/stacktrace/nvhe.h @@ -47,7 +47,7 @@ static inline void kvm_nvhe_unwind_init(struct unwind_state *state, DECLARE_KVM_NVHE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack); DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info); -DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); +DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stack_base); void kvm_nvhe_dump_backtrace(unsigned long hyp_offset); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c3b19b376c86..05ea5223d2d5 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -109,6 +109,9 @@ #define set_pstate_ssbs(x) asm volatile(SET_PSTATE_SSBS(x)) #define set_pstate_dit(x) asm volatile(SET_PSTATE_DIT(x)) +/* Register-based PAN access, for save/restore purposes */ +#define SYS_PSTATE_PAN sys_reg(3, 0, 4, 2, 3) + #define __SYS_BARRIER_INSN(CRm, op2, Rt) \ __emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f)) @@ -280,8 +283,6 @@ #define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5) #define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6) -#define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1) - #define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2) #define SYS_APIAKEYLO_EL1 sys_reg(3, 0, 2, 1, 0) @@ -325,7 +326,25 @@ #define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0) #define SYS_PAR_EL1_F BIT(0) +/* When PAR_EL1.F == 1 */ #define SYS_PAR_EL1_FST GENMASK(6, 1) +#define SYS_PAR_EL1_PTW BIT(8) +#define SYS_PAR_EL1_S BIT(9) +#define SYS_PAR_EL1_AssuredOnly BIT(12) +#define SYS_PAR_EL1_TopLevel BIT(13) +#define SYS_PAR_EL1_Overlay BIT(14) +#define SYS_PAR_EL1_DirtyBit BIT(15) +#define SYS_PAR_EL1_F1_IMPDEF GENMASK_ULL(63, 48) +#define SYS_PAR_EL1_F1_RES0 (BIT(7) | BIT(10) | GENMASK_ULL(47, 16)) +#define SYS_PAR_EL1_RES1 BIT(11) +/* When PAR_EL1.F == 0 */ +#define SYS_PAR_EL1_SH GENMASK_ULL(8, 7) +#define SYS_PAR_EL1_NS BIT(9) +#define SYS_PAR_EL1_F0_IMPDEF BIT(10) +#define SYS_PAR_EL1_NSE BIT(11) +#define SYS_PAR_EL1_PA GENMASK_ULL(51, 12) +#define SYS_PAR_EL1_ATTR GENMASK_ULL(63, 56) +#define SYS_PAR_EL1_F0_RES0 (GENMASK_ULL(6, 1) | GENMASK_ULL(55, 52)) /*** Statistical Profiling Extension ***/ #define PMSEVFR_EL1_RES0_IMP \ @@ -403,7 +422,6 @@ #define SYS_PMCNTENCLR_EL0 sys_reg(3, 3, 9, 12, 2) #define SYS_PMOVSCLR_EL0 sys_reg(3, 3, 9, 12, 3) #define SYS_PMSWINC_EL0 sys_reg(3, 3, 9, 12, 4) -#define SYS_PMSELR_EL0 sys_reg(3, 3, 9, 12, 5) #define SYS_PMCEID0_EL0 sys_reg(3, 3, 9, 12, 6) #define SYS_PMCEID1_EL0 sys_reg(3, 3, 9, 12, 7) #define SYS_PMCCNTR_EL0 sys_reg(3, 3, 9, 13, 0) @@ -457,6 +475,7 @@ #define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) #define SYS_CNTPCT_EL0 sys_reg(3, 3, 14, 0, 1) +#define SYS_CNTVCT_EL0 sys_reg(3, 3, 14, 0, 2) #define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5) #define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6) @@ -464,14 +483,17 @@ #define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1) #define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2) +#define SYS_CNTV_TVAL_EL0 sys_reg(3, 3, 14, 3, 0) #define SYS_CNTV_CTL_EL0 sys_reg(3, 3, 14, 3, 1) #define SYS_CNTV_CVAL_EL0 sys_reg(3, 3, 14, 3, 2) #define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0) #define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1) #define SYS_AARCH32_CNTPCT sys_reg(0, 0, 0, 14, 0) +#define SYS_AARCH32_CNTVCT sys_reg(0, 1, 0, 14, 0) #define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0) #define SYS_AARCH32_CNTPCTSS sys_reg(0, 8, 0, 14, 0) +#define SYS_AARCH32_CNTVCTSS sys_reg(0, 9, 0, 14, 0) #define __PMEV_op2(n) ((n) & 0x7) #define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3)) @@ -499,7 +521,6 @@ #define SYS_VTTBR_EL2 sys_reg(3, 4, 2, 1, 0) #define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2) -#define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) #define SYS_VNCR_EL2 sys_reg(3, 4, 2, 2, 0) #define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) @@ -522,18 +543,6 @@ #define SYS_MAIR_EL2 sys_reg(3, 4, 10, 2, 0) #define SYS_AMAIR_EL2 sys_reg(3, 4, 10, 3, 0) -#define SYS_MPAMHCR_EL2 sys_reg(3, 4, 10, 4, 0) -#define SYS_MPAMVPMV_EL2 sys_reg(3, 4, 10, 4, 1) -#define SYS_MPAM2_EL2 sys_reg(3, 4, 10, 5, 0) -#define __SYS__MPAMVPMx_EL2(x) sys_reg(3, 4, 10, 6, x) -#define SYS_MPAMVPM0_EL2 __SYS__MPAMVPMx_EL2(0) -#define SYS_MPAMVPM1_EL2 __SYS__MPAMVPMx_EL2(1) -#define SYS_MPAMVPM2_EL2 __SYS__MPAMVPMx_EL2(2) -#define SYS_MPAMVPM3_EL2 __SYS__MPAMVPMx_EL2(3) -#define SYS_MPAMVPM4_EL2 __SYS__MPAMVPMx_EL2(4) -#define SYS_MPAMVPM5_EL2 __SYS__MPAMVPMx_EL2(5) -#define SYS_MPAMVPM6_EL2 __SYS__MPAMVPMx_EL2(6) -#define SYS_MPAMVPM7_EL2 __SYS__MPAMVPMx_EL2(7) #define SYS_VBAR_EL2 sys_reg(3, 4, 12, 0, 0) #define SYS_RVBAR_EL2 sys_reg(3, 4, 12, 0, 1) @@ -652,8 +661,26 @@ #define OP_AT_S12E1W sys_insn(AT_Op0, 4, AT_CRn, 8, 5) #define OP_AT_S12E0R sys_insn(AT_Op0, 4, AT_CRn, 8, 6) #define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7) +#define OP_AT_S1E2A sys_insn(AT_Op0, 4, AT_CRn, 9, 2) /* TLBI instructions */ +#define TLBI_Op0 1 + +#define TLBI_Op1_EL1 0 /* Accessible from EL1 or higher */ +#define TLBI_Op1_EL2 4 /* Accessible from EL2 or higher */ + +#define TLBI_CRn_XS 8 /* Extra Slow (the common one) */ +#define TLBI_CRn_nXS 9 /* not Extra Slow (which nobody uses)*/ + +#define TLBI_CRm_IPAIS 0 /* S2 Inner-Shareable */ +#define TLBI_CRm_nROS 1 /* non-Range, Outer-Sharable */ +#define TLBI_CRm_RIS 2 /* Range, Inner-Sharable */ +#define TLBI_CRm_nRIS 3 /* non-Range, Inner-Sharable */ +#define TLBI_CRm_IPAONS 4 /* S2 Outer and Non-Shareable */ +#define TLBI_CRm_ROS 5 /* Range, Outer-Sharable */ +#define TLBI_CRm_RNS 6 /* Range, Non-Sharable */ +#define TLBI_CRm_nRNS 7 /* non-Range, Non-Sharable */ + #define OP_TLBI_VMALLE1OS sys_insn(1, 0, 8, 1, 0) #define OP_TLBI_VAE1OS sys_insn(1, 0, 8, 1, 1) #define OP_TLBI_ASIDE1OS sys_insn(1, 0, 8, 1, 2) @@ -872,10 +899,6 @@ /* Position the attr at the correct index */ #define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8)) -/* id_aa64pfr0 */ -#define ID_AA64PFR0_EL1_ELx_64BIT_ONLY 0x1 -#define ID_AA64PFR0_EL1_ELx_32BIT_64BIT 0x2 - /* id_aa64mmfr0 */ #define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0 #define ID_AA64MMFR0_EL1_TGRAN4_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT @@ -961,15 +984,6 @@ /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */ #define SYS_MPIDR_SAFE_VAL (BIT(31)) -#define TRFCR_ELx_TS_SHIFT 5 -#define TRFCR_ELx_TS_MASK ((0x3UL) << TRFCR_ELx_TS_SHIFT) -#define TRFCR_ELx_TS_VIRTUAL ((0x1UL) << TRFCR_ELx_TS_SHIFT) -#define TRFCR_ELx_TS_GUEST_PHYSICAL ((0x2UL) << TRFCR_ELx_TS_SHIFT) -#define TRFCR_ELx_TS_PHYSICAL ((0x3UL) << TRFCR_ELx_TS_SHIFT) -#define TRFCR_EL2_CX BIT(3) -#define TRFCR_ELx_ExTRE BIT(1) -#define TRFCR_ELx_E0TRE BIT(0) - /* GIC Hypervisor interface registers */ /* ICH_MISR_EL2 bit definitions */ #define ICH_MISR_EOI (1 << 0) @@ -1036,18 +1050,18 @@ * Permission Indirection Extension (PIE) permission encodings. * Encodings with the _O suffix, have overlays applied (Permission Overlay Extension). */ -#define PIE_NONE_O 0x0 -#define PIE_R_O 0x1 -#define PIE_X_O 0x2 -#define PIE_RX_O 0x3 -#define PIE_RW_O 0x5 -#define PIE_RWnX_O 0x6 -#define PIE_RWX_O 0x7 -#define PIE_R 0x8 -#define PIE_GCS 0x9 -#define PIE_RX 0xa -#define PIE_RW 0xc -#define PIE_RWX 0xe +#define PIE_NONE_O UL(0x0) +#define PIE_R_O UL(0x1) +#define PIE_X_O UL(0x2) +#define PIE_RX_O UL(0x3) +#define PIE_RW_O UL(0x5) +#define PIE_RWnX_O UL(0x6) +#define PIE_RWX_O UL(0x7) +#define PIE_R UL(0x8) +#define PIE_GCS UL(0x9) +#define PIE_RX UL(0xa) +#define PIE_RW UL(0xc) +#define PIE_RWX UL(0xe) #define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4)) @@ -1064,6 +1078,29 @@ #define POE_RXW UL(0x7) #define POE_MASK UL(0xf) +/* Initial value for Permission Overlay Extension for EL0 */ +#define POR_EL0_INIT POE_RXW + +/* + * Definitions for Guarded Control Stack + */ + +#define GCS_CAP_ADDR_MASK GENMASK(63, 12) +#define GCS_CAP_ADDR_SHIFT 12 +#define GCS_CAP_ADDR_WIDTH 52 +#define GCS_CAP_ADDR(x) FIELD_GET(GCS_CAP_ADDR_MASK, x) + +#define GCS_CAP_TOKEN_MASK GENMASK(11, 0) +#define GCS_CAP_TOKEN_SHIFT 0 +#define GCS_CAP_TOKEN_WIDTH 12 +#define GCS_CAP_TOKEN(x) FIELD_GET(GCS_CAP_TOKEN_MASK, x) + +#define GCS_CAP_VALID_TOKEN 0x1 +#define GCS_CAP_IN_PROGRESS_TOKEN 0x5 + +#define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | \ + GCS_CAP_VALID_TOKEN) + #define ARM64_FEATURE_FIELD_BITS 4 /* Defined for compatibility only, do not add new users. */ @@ -1181,6 +1218,8 @@ par; \ }) +#define SYS_FIELD_VALUE(reg, field, val) reg##_##field##_##val + #define SYS_FIELD_GET(reg, field, val) \ FIELD_GET(reg##_##field##_MASK, val) @@ -1188,7 +1227,8 @@ FIELD_PREP(reg##_##field##_MASK, val) #define SYS_FIELD_PREP_ENUM(reg, field, val) \ - FIELD_PREP(reg##_##field##_MASK, reg##_##field##_##val) + FIELD_PREP(reg##_##field##_MASK, \ + SYS_FIELD_VALUE(reg, field, val)) #endif diff --git a/arch/arm64/include/asm/patching.h b/arch/arm64/include/asm/text-patching.h index 68908b82b168..587bdb91ab7a 100644 --- a/arch/arm64/include/asm/patching.h +++ b/arch/arm64/include/asm/text-patching.h @@ -8,6 +8,8 @@ int aarch64_insn_read(void *addr, u32 *insnp); int aarch64_insn_write(void *addr, u32 insn); int aarch64_insn_write_literal_u64(void *addr, u64 val); +void *aarch64_insn_set(void *dst, u32 insn, size_t len); +void *aarch64_insn_copy(void *dst, void *src, size_t len); int aarch64_insn_patch_text_nosync(void *addr, u32 insn); int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt); diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index e72a3bf9e563..1114c1c3300a 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -81,6 +81,7 @@ void arch_setup_new_exec(void); #define TIF_SME 27 /* SME in use */ #define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */ #define TIF_KERNEL_FPSTATE 29 /* Task is in a kernel mode FPSIMD section */ +#define TIF_TSC_SIGSEGV 30 /* SIGSEGV on counter-timer access */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) @@ -97,6 +98,7 @@ void arch_setup_new_exec(void); #define _TIF_SVE (1 << TIF_SVE) #define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) +#define _TIF_TSC_SIGSEGV (1 << TIF_TSC_SIGSEGV) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 0150deb332af..8d762607285c 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -9,12 +9,7 @@ #define __ASM_TLB_H #include <linux/pagemap.h> -#include <linux/swap.h> -static inline void __tlb_remove_table(void *_table) -{ - free_page_and_swap_cache((struct page *)_table); -} #define tlb_flush tlb_flush static void tlb_flush(struct mmu_gather *tlb); @@ -82,7 +77,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, { struct ptdesc *ptdesc = page_ptdesc(pte); - pagetable_pte_dtor(ptdesc); tlb_remove_ptdesc(tlb, ptdesc); } @@ -92,7 +86,6 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, { struct ptdesc *ptdesc = virt_to_ptdesc(pmdp); - pagetable_pmd_dtor(ptdesc); tlb_remove_ptdesc(tlb, ptdesc); } #endif @@ -103,7 +96,22 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, { struct ptdesc *ptdesc = virt_to_ptdesc(pudp); - pagetable_pud_dtor(ptdesc); + if (!pgtable_l4_enabled()) + return; + + tlb_remove_ptdesc(tlb, ptdesc); +} +#endif + +#if CONFIG_PGTABLE_LEVELS > 4 +static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4dp, + unsigned long addr) +{ + struct ptdesc *ptdesc = virt_to_ptdesc(p4dp); + + if (!pgtable_l5_enabled()) + return; + tlb_remove_ptdesc(tlb, ptdesc); } #endif diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 1deb5d789c2e..8104aee4f9a0 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -142,17 +142,24 @@ static inline unsigned long get_trans_granule(void) * EL1, Inner Shareable". * */ -#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \ - ({ \ - unsigned long __ta = (baddr); \ - unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \ - __ta &= GENMASK_ULL(36, 0); \ - __ta |= __ttl << 37; \ - __ta |= (unsigned long)(num) << 39; \ - __ta |= (unsigned long)(scale) << 44; \ - __ta |= get_trans_granule() << 46; \ - __ta |= (unsigned long)(asid) << 48; \ - __ta; \ +#define TLBIR_ASID_MASK GENMASK_ULL(63, 48) +#define TLBIR_TG_MASK GENMASK_ULL(47, 46) +#define TLBIR_SCALE_MASK GENMASK_ULL(45, 44) +#define TLBIR_NUM_MASK GENMASK_ULL(43, 39) +#define TLBIR_TTL_MASK GENMASK_ULL(38, 37) +#define TLBIR_BADDR_MASK GENMASK_ULL(36, 0) + +#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \ + ({ \ + unsigned long __ta = 0; \ + unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \ + __ta |= FIELD_PREP(TLBIR_BADDR_MASK, baddr); \ + __ta |= FIELD_PREP(TLBIR_TTL_MASK, __ttl); \ + __ta |= FIELD_PREP(TLBIR_NUM_MASK, num); \ + __ta |= FIELD_PREP(TLBIR_SCALE_MASK, scale); \ + __ta |= FIELD_PREP(TLBIR_TG_MASK, get_trans_granule()); \ + __ta |= FIELD_PREP(TLBIR_ASID_MASK, asid); \ + __ta; \ }) /* These macros are used by the TLBI RANGE feature. */ @@ -161,12 +168,18 @@ static inline unsigned long get_trans_granule(void) #define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3) /* - * Generate 'num' values from -1 to 30 with -1 rejected by the - * __flush_tlb_range() loop below. + * Generate 'num' values from -1 to 31 with -1 rejected by the + * __flush_tlb_range() loop below. Its return value is only + * significant for a maximum of MAX_TLBI_RANGE_PAGES pages. If + * 'pages' is more than that, you must iterate over the overall + * range. */ -#define TLBI_RANGE_MASK GENMASK_ULL(4, 0) -#define __TLBI_RANGE_NUM(pages, scale) \ - ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1) +#define __TLBI_RANGE_NUM(pages, scale) \ + ({ \ + int __pages = min((pages), \ + __TLBI_RANGE_PAGES(31, (scale))); \ + (__pages >> (5 * (scale) + 1)) - 1; \ + }) /* * TLB Invalidation @@ -379,41 +392,39 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) * 3. If there is 1 page remaining, flush it through non-range operations. Range * operations can only span an even number of pages. We save this for last to * ensure 64KB start alignment is maintained for the LPA2 case. - * - * Note that certain ranges can be represented by either num = 31 and - * scale or num = 0 and scale + 1. The loop below favours the latter - * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. */ #define __flush_tlb_range_op(op, start, pages, stride, \ asid, tlb_level, tlbi_user, lpa2) \ do { \ + typeof(start) __flush_start = start; \ + typeof(pages) __flush_pages = pages; \ int num = 0; \ int scale = 3; \ int shift = lpa2 ? 16 : PAGE_SHIFT; \ unsigned long addr; \ \ - while (pages > 0) { \ + while (__flush_pages > 0) { \ if (!system_supports_tlb_range() || \ - pages == 1 || \ - (lpa2 && start != ALIGN(start, SZ_64K))) { \ - addr = __TLBI_VADDR(start, asid); \ + __flush_pages == 1 || \ + (lpa2 && __flush_start != ALIGN(__flush_start, SZ_64K))) { \ + addr = __TLBI_VADDR(__flush_start, asid); \ __tlbi_level(op, addr, tlb_level); \ if (tlbi_user) \ __tlbi_user_level(op, addr, tlb_level); \ - start += stride; \ - pages -= stride >> PAGE_SHIFT; \ + __flush_start += stride; \ + __flush_pages -= stride >> PAGE_SHIFT; \ continue; \ } \ \ - num = __TLBI_RANGE_NUM(pages, scale); \ + num = __TLBI_RANGE_NUM(__flush_pages, scale); \ if (num >= 0) { \ - addr = __TLBI_VADDR_RANGE(start >> shift, asid, \ + addr = __TLBI_VADDR_RANGE(__flush_start >> shift, asid, \ scale, num, tlb_level); \ __tlbi(r##op, addr); \ if (tlbi_user) \ __tlbi_user(r##op, addr); \ - start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ - pages -= __TLBI_RANGE_PAGES(num, scale); \ + __flush_start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ + __flush_pages -= __TLBI_RANGE_PAGES(num, scale);\ } \ scale--; \ } \ @@ -422,7 +433,24 @@ do { \ #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \ __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled()); -static inline void __flush_tlb_range(struct vm_area_struct *vma, +static inline bool __flush_tlb_range_limit_excess(unsigned long start, + unsigned long end, unsigned long pages, unsigned long stride) +{ + /* + * When the system does not support TLB range based flush + * operation, (MAX_DVM_OPS - 1) pages can be handled. But + * with TLB range based operation, MAX_TLBI_RANGE_PAGES + * pages can be handled. + */ + if ((!system_supports_tlb_range() && + (end - start) >= (MAX_DVM_OPS * stride)) || + pages > MAX_TLBI_RANGE_PAGES) + return true; + + return false; +} + +static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long stride, bool last_level, int tlb_level) @@ -433,15 +461,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, end = round_up(end, stride); pages = (end - start) >> PAGE_SHIFT; - /* - * When not uses TLB range ops, we can handle up to - * (MAX_DVM_OPS - 1) pages; - * When uses TLB range ops, we can handle up to - * (MAX_TLBI_RANGE_PAGES - 1) pages. - */ - if ((!system_supports_tlb_range() && - (end - start) >= (MAX_DVM_OPS * stride)) || - pages >= MAX_TLBI_RANGE_PAGES) { + if (__flush_tlb_range_limit_excess(start, end, pages, stride)) { flush_tlb_mm(vma->vm_mm); return; } @@ -456,10 +476,19 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true, lpa2_is_enabled()); - dsb(ish); mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end); } +static inline void __flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + unsigned long stride, bool last_level, + int tlb_level) +{ + __flush_tlb_range_nosync(vma, start, end, stride, + last_level, tlb_level); + dsb(ish); +} + static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { @@ -474,19 +503,21 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - unsigned long addr; + const unsigned long stride = PAGE_SIZE; + unsigned long pages; - if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) { + start = round_down(start, stride); + end = round_up(end, stride); + pages = (end - start) >> PAGE_SHIFT; + + if (__flush_tlb_range_limit_excess(start, end, pages, stride)) { flush_tlb_all(); return; } - start = __TLBI_VADDR(start, 0); - end = __TLBI_VADDR(end, 0); - dsb(ishst); - for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) - __tlbi(vaale1is, addr); + __flush_tlb_range_op(vaale1is, start, pages, stride, 0, + TLBI_TTL_UNKNOWN, false, lpa2_is_enabled()); dsb(ish); isb(); } diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index a323b109b9c4..341174bf9106 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -5,6 +5,7 @@ #include <linux/cpumask.h> #ifdef CONFIG_NUMA +#include <asm/numa.h> struct pci_bus; int pcibus_to_node(struct pci_bus *bus); @@ -25,19 +26,15 @@ void update_freq_counters_refs(void); #define arch_scale_freq_invariant topology_scale_freq_invariant #define arch_scale_freq_ref topology_get_freq_ref -#ifdef CONFIG_ACPI_CPPC_LIB -#define arch_init_invariance_cppc topology_init_cpu_capacity_cppc -#endif - /* Replace task scheduler's default cpu-invariant accounting */ #define arch_scale_cpu_capacity topology_get_cpu_scale /* Enable topology flag updates */ #define arch_update_cpu_topology topology_update_cpu_topology -/* Replace task scheduler's default thermal pressure API */ -#define arch_scale_thermal_pressure topology_get_thermal_pressure -#define arch_update_thermal_pressure topology_update_thermal_pressure +/* Replace task scheduler's default HW pressure API */ +#define arch_scale_hw_pressure topology_get_hw_pressure +#define arch_update_hw_pressure topology_update_hw_pressure #include <asm-generic/topology.h> diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index eefe766d6161..d780d1bd2eac 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -25,6 +25,7 @@ try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn) void force_signal_inject(int signal, int code, unsigned long address, unsigned long err); void arm64_notify_segfault(unsigned long addr); void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str); +void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey); void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str); void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str); diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 14be5000c5a0..5b91803201ef 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -184,29 +184,40 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) * The "__xxx_error" versions set the third argument to -EFAULT if an error * occurs, and leave it unchanged on success. */ -#define __get_mem_asm(load, reg, x, addr, err, type) \ +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT +#define __get_mem_asm(load, reg, x, addr, label, type) \ + asm_goto_output( \ + "1: " load " " reg "0, [%1]\n" \ + _ASM_EXTABLE_##type##ACCESS(1b, %l2) \ + : "=r" (x) \ + : "r" (addr) : : label) +#else +#define __get_mem_asm(load, reg, x, addr, label, type) do { \ + int __gma_err = 0; \ asm volatile( \ "1: " load " " reg "1, [%2]\n" \ "2:\n" \ _ASM_EXTABLE_##type##ACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \ - : "+r" (err), "=r" (x) \ - : "r" (addr)) + : "+r" (__gma_err), "=r" (x) \ + : "r" (addr)); \ + if (__gma_err) goto label; } while (0) +#endif -#define __raw_get_mem(ldr, x, ptr, err, type) \ +#define __raw_get_mem(ldr, x, ptr, label, type) \ do { \ unsigned long __gu_val; \ switch (sizeof(*(ptr))) { \ case 1: \ - __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err), type); \ + __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), label, type); \ break; \ case 2: \ - __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err), type); \ + __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), label, type); \ break; \ case 4: \ - __get_mem_asm(ldr, "%w", __gu_val, (ptr), (err), type); \ + __get_mem_asm(ldr, "%w", __gu_val, (ptr), label, type); \ break; \ case 8: \ - __get_mem_asm(ldr, "%x", __gu_val, (ptr), (err), type); \ + __get_mem_asm(ldr, "%x", __gu_val, (ptr), label, type); \ break; \ default: \ BUILD_BUG(); \ @@ -219,27 +230,34 @@ do { \ * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions, * we must evaluate these outside of the critical section. */ -#define __raw_get_user(x, ptr, err) \ +#define __raw_get_user(x, ptr, label) \ do { \ __typeof__(*(ptr)) __user *__rgu_ptr = (ptr); \ __typeof__(x) __rgu_val; \ __chk_user_ptr(ptr); \ - \ - uaccess_ttbr0_enable(); \ - __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err, U); \ - uaccess_ttbr0_disable(); \ - \ - (x) = __rgu_val; \ + do { \ + __label__ __rgu_failed; \ + uaccess_ttbr0_enable(); \ + __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, __rgu_failed, U); \ + uaccess_ttbr0_disable(); \ + (x) = __rgu_val; \ + break; \ + __rgu_failed: \ + uaccess_ttbr0_disable(); \ + goto label; \ + } while (0); \ } while (0) #define __get_user_error(x, ptr, err) \ do { \ + __label__ __gu_failed; \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ if (access_ok(__p, sizeof(*__p))) { \ __p = uaccess_mask_ptr(__p); \ - __raw_get_user((x), __p, (err)); \ + __raw_get_user((x), __p, __gu_failed); \ } else { \ + __gu_failed: \ (x) = (__force __typeof__(x))0; (err) = -EFAULT; \ } \ } while (0) @@ -262,40 +280,42 @@ do { \ do { \ __typeof__(dst) __gkn_dst = (dst); \ __typeof__(src) __gkn_src = (src); \ - int __gkn_err = 0; \ - \ - __mte_enable_tco_async(); \ - __raw_get_mem("ldr", *((type *)(__gkn_dst)), \ - (__force type *)(__gkn_src), __gkn_err, K); \ - __mte_disable_tco_async(); \ + do { \ + __label__ __gkn_label; \ \ - if (unlikely(__gkn_err)) \ + __mte_enable_tco_async(); \ + __raw_get_mem("ldr", *((type *)(__gkn_dst)), \ + (__force type *)(__gkn_src), __gkn_label, K); \ + __mte_disable_tco_async(); \ + break; \ + __gkn_label: \ + __mte_disable_tco_async(); \ goto err_label; \ + } while (0); \ } while (0) -#define __put_mem_asm(store, reg, x, addr, err, type) \ - asm volatile( \ - "1: " store " " reg "1, [%2]\n" \ +#define __put_mem_asm(store, reg, x, addr, label, type) \ + asm goto( \ + "1: " store " " reg "0, [%1]\n" \ "2:\n" \ - _ASM_EXTABLE_##type##ACCESS_ERR(1b, 2b, %w0) \ - : "+r" (err) \ - : "rZ" (x), "r" (addr)) + _ASM_EXTABLE_##type##ACCESS(1b, %l2) \ + : : "rZ" (x), "r" (addr) : : label) -#define __raw_put_mem(str, x, ptr, err, type) \ +#define __raw_put_mem(str, x, ptr, label, type) \ do { \ __typeof__(*(ptr)) __pu_val = (x); \ switch (sizeof(*(ptr))) { \ case 1: \ - __put_mem_asm(str "b", "%w", __pu_val, (ptr), (err), type); \ + __put_mem_asm(str "b", "%w", __pu_val, (ptr), label, type); \ break; \ case 2: \ - __put_mem_asm(str "h", "%w", __pu_val, (ptr), (err), type); \ + __put_mem_asm(str "h", "%w", __pu_val, (ptr), label, type); \ break; \ case 4: \ - __put_mem_asm(str, "%w", __pu_val, (ptr), (err), type); \ + __put_mem_asm(str, "%w", __pu_val, (ptr), label, type); \ break; \ case 8: \ - __put_mem_asm(str, "%x", __pu_val, (ptr), (err), type); \ + __put_mem_asm(str, "%x", __pu_val, (ptr), label, type); \ break; \ default: \ BUILD_BUG(); \ @@ -307,25 +327,34 @@ do { \ * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions, * we must evaluate these outside of the critical section. */ -#define __raw_put_user(x, ptr, err) \ +#define __raw_put_user(x, ptr, label) \ do { \ + __label__ __rpu_failed; \ __typeof__(*(ptr)) __user *__rpu_ptr = (ptr); \ __typeof__(*(ptr)) __rpu_val = (x); \ __chk_user_ptr(__rpu_ptr); \ \ - uaccess_ttbr0_enable(); \ - __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err, U); \ - uaccess_ttbr0_disable(); \ + do { \ + uaccess_ttbr0_enable(); \ + __raw_put_mem("sttr", __rpu_val, __rpu_ptr, __rpu_failed, U); \ + uaccess_ttbr0_disable(); \ + break; \ + __rpu_failed: \ + uaccess_ttbr0_disable(); \ + goto label; \ + } while (0); \ } while (0) #define __put_user_error(x, ptr, err) \ do { \ + __label__ __pu_failed; \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ if (access_ok(__p, sizeof(*__p))) { \ __p = uaccess_mask_ptr(__p); \ - __raw_put_user((x), __p, (err)); \ + __raw_put_user((x), __p, __pu_failed); \ } else { \ + __pu_failed: \ (err) = -EFAULT; \ } \ } while (0) @@ -348,15 +377,18 @@ do { \ do { \ __typeof__(dst) __pkn_dst = (dst); \ __typeof__(src) __pkn_src = (src); \ - int __pkn_err = 0; \ \ - __mte_enable_tco_async(); \ - __raw_put_mem("str", *((type *)(__pkn_src)), \ - (__force type *)(__pkn_dst), __pkn_err, K); \ - __mte_disable_tco_async(); \ - \ - if (unlikely(__pkn_err)) \ + do { \ + __label__ __pkn_err; \ + __mte_enable_tco_async(); \ + __raw_put_mem("str", *((type *)(__pkn_src)), \ + (__force type *)(__pkn_dst), __pkn_err, K); \ + __mte_disable_tco_async(); \ + break; \ + __pkn_err: \ + __mte_disable_tco_async(); \ goto err_label; \ + } while (0); \ } while(0) extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); @@ -381,6 +413,51 @@ extern unsigned long __must_check __arch_copy_to_user(void __user *to, const voi __actu_ret; \ }) +static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len) +{ + if (unlikely(!access_ok(ptr,len))) + return 0; + uaccess_ttbr0_enable(); + return 1; +} +#define user_access_begin(a,b) user_access_begin(a,b) +#define user_access_end() uaccess_ttbr0_disable() +#define unsafe_put_user(x, ptr, label) \ + __raw_put_mem("sttr", x, uaccess_mask_ptr(ptr), label, U) +#define unsafe_get_user(x, ptr, label) \ + __raw_get_mem("ldtr", x, uaccess_mask_ptr(ptr), label, U) + +/* + * KCSAN uses these to save and restore ttbr state. + * We do not support KCSAN with ARM64_SW_TTBR0_PAN, so + * they are no-ops. + */ +static inline unsigned long user_access_save(void) { return 0; } +static inline void user_access_restore(unsigned long enabled) { } + +/* + * We want the unsafe accessors to always be inlined and use + * the error labels - thus the macro games. + */ +#define unsafe_copy_loop(dst, src, len, type, label) \ + while (len >= sizeof(type)) { \ + unsafe_put_user(*(type *)(src),(type __user *)(dst),label); \ + dst += sizeof(type); \ + src += sizeof(type); \ + len -= sizeof(type); \ + } + +#define unsafe_copy_to_user(_dst,_src,_len,label) \ +do { \ + char __user *__ucu_dst = (_dst); \ + const char *__ucu_src = (_src); \ + size_t __ucu_len = (_len); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \ +} while (0) + #define INLINE_COPY_TO_USER #define INLINE_COPY_FROM_USER @@ -425,4 +502,44 @@ static inline size_t probe_subpage_writeable(const char __user *uaddr, #endif /* CONFIG_ARCH_HAS_SUBPAGE_FAULTS */ +#ifdef CONFIG_ARM64_GCS + +static inline int gcssttr(unsigned long __user *addr, unsigned long val) +{ + register unsigned long __user *_addr __asm__ ("x0") = addr; + register unsigned long _val __asm__ ("x1") = val; + int err = 0; + + /* GCSSTTR x1, x0 */ + asm volatile( + "1: .inst 0xd91f1c01\n" + "2: \n" + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0) + : "+r" (err) + : "rZ" (_val), "r" (_addr) + : "memory"); + + return err; +} + +static inline void put_user_gcs(unsigned long val, unsigned long __user *addr, + int *err) +{ + int ret; + + if (!access_ok((char __user *)addr, sizeof(u64))) { + *err = -EFAULT; + return; + } + + uaccess_ttbr0_enable(); + ret = gcssttr(addr, val); + if (ret != 0) + *err = ret; + uaccess_ttbr0_disable(); +} + + +#endif /* CONFIG_ARM64_GCS */ + #endif /* __ASM_UACCESS_H */ diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 491b2b9bd553..80618c9bbcd8 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -17,35 +17,17 @@ #define __ARCH_WANT_SYS_VFORK /* - * Compat syscall numbers used by the AArch64 kernel. - */ -#define __NR_compat_restart_syscall 0 -#define __NR_compat_exit 1 -#define __NR_compat_read 3 -#define __NR_compat_write 4 -#define __NR_compat_gettimeofday 78 -#define __NR_compat_sigreturn 119 -#define __NR_compat_rt_sigreturn 173 -#define __NR_compat_clock_gettime 263 -#define __NR_compat_clock_getres 264 -#define __NR_compat_clock_gettime64 403 -#define __NR_compat_clock_getres_time64 406 - -/* * The following SVCs are ARM private. */ #define __ARM_NR_COMPAT_BASE 0x0f0000 #define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE + 2) #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) - -#define __NR_compat_syscalls 462 #endif #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_NEW_STAT -#ifndef __COMPAT_SYSCALL_NR -#include <uapi/asm/unistd.h> -#endif +#include <asm/unistd_64.h> #define NR_syscalls (__NR_syscalls) diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 7118282d1c79..e0b1a0b57f75 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -1,936 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -/* - * AArch32 (compat) system call definitions. - * - * Copyright (C) 2001-2005 Russell King - * Copyright (C) 2012 ARM Ltd. - */ +#ifndef _UAPI__ASM_ARM_UNISTD_H +#define _UAPI__ASM_ARM_UNISTD_H -#ifndef __SYSCALL -#define __SYSCALL(x, y) -#endif +#include <asm/unistd_32.h> -#define __NR_restart_syscall 0 -__SYSCALL(__NR_restart_syscall, sys_restart_syscall) -#define __NR_exit 1 -__SYSCALL(__NR_exit, sys_exit) -#define __NR_fork 2 -__SYSCALL(__NR_fork, sys_fork) -#define __NR_read 3 -__SYSCALL(__NR_read, sys_read) -#define __NR_write 4 -__SYSCALL(__NR_write, sys_write) -#define __NR_open 5 -__SYSCALL(__NR_open, compat_sys_open) -#define __NR_close 6 -__SYSCALL(__NR_close, sys_close) - /* 7 was sys_waitpid */ -__SYSCALL(7, sys_ni_syscall) -#define __NR_creat 8 -__SYSCALL(__NR_creat, sys_creat) -#define __NR_link 9 -__SYSCALL(__NR_link, sys_link) -#define __NR_unlink 10 -__SYSCALL(__NR_unlink, sys_unlink) -#define __NR_execve 11 -__SYSCALL(__NR_execve, compat_sys_execve) -#define __NR_chdir 12 -__SYSCALL(__NR_chdir, sys_chdir) - /* 13 was sys_time */ -__SYSCALL(13, sys_ni_syscall) -#define __NR_mknod 14 -__SYSCALL(__NR_mknod, sys_mknod) -#define __NR_chmod 15 -__SYSCALL(__NR_chmod, sys_chmod) -#define __NR_lchown 16 -__SYSCALL(__NR_lchown, sys_lchown16) - /* 17 was sys_break */ -__SYSCALL(17, sys_ni_syscall) - /* 18 was sys_stat */ -__SYSCALL(18, sys_ni_syscall) -#define __NR_lseek 19 -__SYSCALL(__NR_lseek, compat_sys_lseek) -#define __NR_getpid 20 -__SYSCALL(__NR_getpid, sys_getpid) -#define __NR_mount 21 -__SYSCALL(__NR_mount, sys_mount) - /* 22 was sys_umount */ -__SYSCALL(22, sys_ni_syscall) -#define __NR_setuid 23 -__SYSCALL(__NR_setuid, sys_setuid16) -#define __NR_getuid 24 -__SYSCALL(__NR_getuid, sys_getuid16) - /* 25 was sys_stime */ -__SYSCALL(25, sys_ni_syscall) -#define __NR_ptrace 26 -__SYSCALL(__NR_ptrace, compat_sys_ptrace) - /* 27 was sys_alarm */ -__SYSCALL(27, sys_ni_syscall) - /* 28 was sys_fstat */ -__SYSCALL(28, sys_ni_syscall) -#define __NR_pause 29 -__SYSCALL(__NR_pause, sys_pause) - /* 30 was sys_utime */ -__SYSCALL(30, sys_ni_syscall) - /* 31 was sys_stty */ -__SYSCALL(31, sys_ni_syscall) - /* 32 was sys_gtty */ -__SYSCALL(32, sys_ni_syscall) -#define __NR_access 33 -__SYSCALL(__NR_access, sys_access) -#define __NR_nice 34 -__SYSCALL(__NR_nice, sys_nice) - /* 35 was sys_ftime */ -__SYSCALL(35, sys_ni_syscall) -#define __NR_sync 36 -__SYSCALL(__NR_sync, sys_sync) -#define __NR_kill 37 -__SYSCALL(__NR_kill, sys_kill) -#define __NR_rename 38 -__SYSCALL(__NR_rename, sys_rename) -#define __NR_mkdir 39 -__SYSCALL(__NR_mkdir, sys_mkdir) -#define __NR_rmdir 40 -__SYSCALL(__NR_rmdir, sys_rmdir) -#define __NR_dup 41 -__SYSCALL(__NR_dup, sys_dup) -#define __NR_pipe 42 -__SYSCALL(__NR_pipe, sys_pipe) -#define __NR_times 43 -__SYSCALL(__NR_times, compat_sys_times) - /* 44 was sys_prof */ -__SYSCALL(44, sys_ni_syscall) -#define __NR_brk 45 -__SYSCALL(__NR_brk, sys_brk) -#define __NR_setgid 46 -__SYSCALL(__NR_setgid, sys_setgid16) -#define __NR_getgid 47 -__SYSCALL(__NR_getgid, sys_getgid16) - /* 48 was sys_signal */ -__SYSCALL(48, sys_ni_syscall) -#define __NR_geteuid 49 -__SYSCALL(__NR_geteuid, sys_geteuid16) -#define __NR_getegid 50 -__SYSCALL(__NR_getegid, sys_getegid16) -#define __NR_acct 51 -__SYSCALL(__NR_acct, sys_acct) -#define __NR_umount2 52 -__SYSCALL(__NR_umount2, sys_umount) - /* 53 was sys_lock */ -__SYSCALL(53, sys_ni_syscall) -#define __NR_ioctl 54 -__SYSCALL(__NR_ioctl, compat_sys_ioctl) -#define __NR_fcntl 55 -__SYSCALL(__NR_fcntl, compat_sys_fcntl) - /* 56 was sys_mpx */ -__SYSCALL(56, sys_ni_syscall) -#define __NR_setpgid 57 -__SYSCALL(__NR_setpgid, sys_setpgid) - /* 58 was sys_ulimit */ -__SYSCALL(58, sys_ni_syscall) - /* 59 was sys_olduname */ -__SYSCALL(59, sys_ni_syscall) -#define __NR_umask 60 -__SYSCALL(__NR_umask, sys_umask) -#define __NR_chroot 61 -__SYSCALL(__NR_chroot, sys_chroot) -#define __NR_ustat 62 -__SYSCALL(__NR_ustat, compat_sys_ustat) -#define __NR_dup2 63 -__SYSCALL(__NR_dup2, sys_dup2) -#define __NR_getppid 64 -__SYSCALL(__NR_getppid, sys_getppid) -#define __NR_getpgrp 65 -__SYSCALL(__NR_getpgrp, sys_getpgrp) -#define __NR_setsid 66 -__SYSCALL(__NR_setsid, sys_setsid) -#define __NR_sigaction 67 -__SYSCALL(__NR_sigaction, compat_sys_sigaction) - /* 68 was sys_sgetmask */ -__SYSCALL(68, sys_ni_syscall) - /* 69 was sys_ssetmask */ -__SYSCALL(69, sys_ni_syscall) -#define __NR_setreuid 70 -__SYSCALL(__NR_setreuid, sys_setreuid16) -#define __NR_setregid 71 -__SYSCALL(__NR_setregid, sys_setregid16) -#define __NR_sigsuspend 72 -__SYSCALL(__NR_sigsuspend, sys_sigsuspend) -#define __NR_sigpending 73 -__SYSCALL(__NR_sigpending, compat_sys_sigpending) -#define __NR_sethostname 74 -__SYSCALL(__NR_sethostname, sys_sethostname) -#define __NR_setrlimit 75 -__SYSCALL(__NR_setrlimit, compat_sys_setrlimit) - /* 76 was compat_sys_getrlimit */ -__SYSCALL(76, sys_ni_syscall) -#define __NR_getrusage 77 -__SYSCALL(__NR_getrusage, compat_sys_getrusage) -#define __NR_gettimeofday 78 -__SYSCALL(__NR_gettimeofday, compat_sys_gettimeofday) -#define __NR_settimeofday 79 -__SYSCALL(__NR_settimeofday, compat_sys_settimeofday) -#define __NR_getgroups 80 -__SYSCALL(__NR_getgroups, sys_getgroups16) -#define __NR_setgroups 81 -__SYSCALL(__NR_setgroups, sys_setgroups16) - /* 82 was compat_sys_select */ -__SYSCALL(82, sys_ni_syscall) -#define __NR_symlink 83 -__SYSCALL(__NR_symlink, sys_symlink) - /* 84 was sys_lstat */ -__SYSCALL(84, sys_ni_syscall) -#define __NR_readlink 85 -__SYSCALL(__NR_readlink, sys_readlink) -#define __NR_uselib 86 -__SYSCALL(__NR_uselib, sys_uselib) -#define __NR_swapon 87 -__SYSCALL(__NR_swapon, sys_swapon) -#define __NR_reboot 88 -__SYSCALL(__NR_reboot, sys_reboot) - /* 89 was sys_readdir */ -__SYSCALL(89, sys_ni_syscall) - /* 90 was sys_mmap */ -__SYSCALL(90, sys_ni_syscall) -#define __NR_munmap 91 -__SYSCALL(__NR_munmap, sys_munmap) -#define __NR_truncate 92 -__SYSCALL(__NR_truncate, compat_sys_truncate) -#define __NR_ftruncate 93 -__SYSCALL(__NR_ftruncate, compat_sys_ftruncate) -#define __NR_fchmod 94 -__SYSCALL(__NR_fchmod, sys_fchmod) -#define __NR_fchown 95 -__SYSCALL(__NR_fchown, sys_fchown16) -#define __NR_getpriority 96 -__SYSCALL(__NR_getpriority, sys_getpriority) -#define __NR_setpriority 97 -__SYSCALL(__NR_setpriority, sys_setpriority) - /* 98 was sys_profil */ -__SYSCALL(98, sys_ni_syscall) -#define __NR_statfs 99 -__SYSCALL(__NR_statfs, compat_sys_statfs) -#define __NR_fstatfs 100 -__SYSCALL(__NR_fstatfs, compat_sys_fstatfs) - /* 101 was sys_ioperm */ -__SYSCALL(101, sys_ni_syscall) - /* 102 was sys_socketcall */ -__SYSCALL(102, sys_ni_syscall) -#define __NR_syslog 103 -__SYSCALL(__NR_syslog, sys_syslog) -#define __NR_setitimer 104 -__SYSCALL(__NR_setitimer, compat_sys_setitimer) -#define __NR_getitimer 105 -__SYSCALL(__NR_getitimer, compat_sys_getitimer) -#define __NR_stat 106 -__SYSCALL(__NR_stat, compat_sys_newstat) -#define __NR_lstat 107 -__SYSCALL(__NR_lstat, compat_sys_newlstat) -#define __NR_fstat 108 -__SYSCALL(__NR_fstat, compat_sys_newfstat) - /* 109 was sys_uname */ -__SYSCALL(109, sys_ni_syscall) - /* 110 was sys_iopl */ -__SYSCALL(110, sys_ni_syscall) -#define __NR_vhangup 111 -__SYSCALL(__NR_vhangup, sys_vhangup) - /* 112 was sys_idle */ -__SYSCALL(112, sys_ni_syscall) - /* 113 was sys_syscall */ -__SYSCALL(113, sys_ni_syscall) -#define __NR_wait4 114 -__SYSCALL(__NR_wait4, compat_sys_wait4) -#define __NR_swapoff 115 -__SYSCALL(__NR_swapoff, sys_swapoff) -#define __NR_sysinfo 116 -__SYSCALL(__NR_sysinfo, compat_sys_sysinfo) - /* 117 was sys_ipc */ -__SYSCALL(117, sys_ni_syscall) -#define __NR_fsync 118 -__SYSCALL(__NR_fsync, sys_fsync) -#define __NR_sigreturn 119 -__SYSCALL(__NR_sigreturn, compat_sys_sigreturn) -#define __NR_clone 120 -__SYSCALL(__NR_clone, sys_clone) -#define __NR_setdomainname 121 -__SYSCALL(__NR_setdomainname, sys_setdomainname) -#define __NR_uname 122 -__SYSCALL(__NR_uname, sys_newuname) - /* 123 was sys_modify_ldt */ -__SYSCALL(123, sys_ni_syscall) -#define __NR_adjtimex 124 -__SYSCALL(__NR_adjtimex, sys_adjtimex_time32) -#define __NR_mprotect 125 -__SYSCALL(__NR_mprotect, sys_mprotect) -#define __NR_sigprocmask 126 -__SYSCALL(__NR_sigprocmask, compat_sys_sigprocmask) - /* 127 was sys_create_module */ -__SYSCALL(127, sys_ni_syscall) -#define __NR_init_module 128 -__SYSCALL(__NR_init_module, sys_init_module) -#define __NR_delete_module 129 -__SYSCALL(__NR_delete_module, sys_delete_module) - /* 130 was sys_get_kernel_syms */ -__SYSCALL(130, sys_ni_syscall) -#define __NR_quotactl 131 -__SYSCALL(__NR_quotactl, sys_quotactl) -#define __NR_getpgid 132 -__SYSCALL(__NR_getpgid, sys_getpgid) -#define __NR_fchdir 133 -__SYSCALL(__NR_fchdir, sys_fchdir) -#define __NR_bdflush 134 -__SYSCALL(__NR_bdflush, sys_ni_syscall) -#define __NR_sysfs 135 -__SYSCALL(__NR_sysfs, sys_sysfs) -#define __NR_personality 136 -__SYSCALL(__NR_personality, sys_personality) - /* 137 was sys_afs_syscall */ -__SYSCALL(137, sys_ni_syscall) -#define __NR_setfsuid 138 -__SYSCALL(__NR_setfsuid, sys_setfsuid16) -#define __NR_setfsgid 139 -__SYSCALL(__NR_setfsgid, sys_setfsgid16) -#define __NR__llseek 140 -__SYSCALL(__NR__llseek, sys_llseek) -#define __NR_getdents 141 -__SYSCALL(__NR_getdents, compat_sys_getdents) -#define __NR__newselect 142 -__SYSCALL(__NR__newselect, compat_sys_select) -#define __NR_flock 143 -__SYSCALL(__NR_flock, sys_flock) -#define __NR_msync 144 -__SYSCALL(__NR_msync, sys_msync) -#define __NR_readv 145 -__SYSCALL(__NR_readv, sys_readv) -#define __NR_writev 146 -__SYSCALL(__NR_writev, sys_writev) -#define __NR_getsid 147 -__SYSCALL(__NR_getsid, sys_getsid) -#define __NR_fdatasync 148 -__SYSCALL(__NR_fdatasync, sys_fdatasync) - /* 149 was sys_sysctl */ -__SYSCALL(149, sys_ni_syscall) -#define __NR_mlock 150 -__SYSCALL(__NR_mlock, sys_mlock) -#define __NR_munlock 151 -__SYSCALL(__NR_munlock, sys_munlock) -#define __NR_mlockall 152 -__SYSCALL(__NR_mlockall, sys_mlockall) -#define __NR_munlockall 153 -__SYSCALL(__NR_munlockall, sys_munlockall) -#define __NR_sched_setparam 154 -__SYSCALL(__NR_sched_setparam, sys_sched_setparam) -#define __NR_sched_getparam 155 -__SYSCALL(__NR_sched_getparam, sys_sched_getparam) -#define __NR_sched_setscheduler 156 -__SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler) -#define __NR_sched_getscheduler 157 -__SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler) -#define __NR_sched_yield 158 -__SYSCALL(__NR_sched_yield, sys_sched_yield) -#define __NR_sched_get_priority_max 159 -__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max) -#define __NR_sched_get_priority_min 160 -__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min) -#define __NR_sched_rr_get_interval 161 -__SYSCALL(__NR_sched_rr_get_interval, sys_sched_rr_get_interval_time32) -#define __NR_nanosleep 162 -__SYSCALL(__NR_nanosleep, sys_nanosleep_time32) -#define __NR_mremap 163 -__SYSCALL(__NR_mremap, sys_mremap) -#define __NR_setresuid 164 -__SYSCALL(__NR_setresuid, sys_setresuid16) -#define __NR_getresuid 165 -__SYSCALL(__NR_getresuid, sys_getresuid16) - /* 166 was sys_vm86 */ -__SYSCALL(166, sys_ni_syscall) - /* 167 was sys_query_module */ -__SYSCALL(167, sys_ni_syscall) -#define __NR_poll 168 -__SYSCALL(__NR_poll, sys_poll) -#define __NR_nfsservctl 169 -__SYSCALL(__NR_nfsservctl, sys_ni_syscall) -#define __NR_setresgid 170 -__SYSCALL(__NR_setresgid, sys_setresgid16) -#define __NR_getresgid 171 -__SYSCALL(__NR_getresgid, sys_getresgid16) -#define __NR_prctl 172 -__SYSCALL(__NR_prctl, sys_prctl) -#define __NR_rt_sigreturn 173 -__SYSCALL(__NR_rt_sigreturn, compat_sys_rt_sigreturn) -#define __NR_rt_sigaction 174 -__SYSCALL(__NR_rt_sigaction, compat_sys_rt_sigaction) -#define __NR_rt_sigprocmask 175 -__SYSCALL(__NR_rt_sigprocmask, compat_sys_rt_sigprocmask) -#define __NR_rt_sigpending 176 -__SYSCALL(__NR_rt_sigpending, compat_sys_rt_sigpending) -#define __NR_rt_sigtimedwait 177 -__SYSCALL(__NR_rt_sigtimedwait, compat_sys_rt_sigtimedwait_time32) -#define __NR_rt_sigqueueinfo 178 -__SYSCALL(__NR_rt_sigqueueinfo, compat_sys_rt_sigqueueinfo) -#define __NR_rt_sigsuspend 179 -__SYSCALL(__NR_rt_sigsuspend, compat_sys_rt_sigsuspend) -#define __NR_pread64 180 -__SYSCALL(__NR_pread64, compat_sys_aarch32_pread64) -#define __NR_pwrite64 181 -__SYSCALL(__NR_pwrite64, compat_sys_aarch32_pwrite64) -#define __NR_chown 182 -__SYSCALL(__NR_chown, sys_chown16) -#define __NR_getcwd 183 -__SYSCALL(__NR_getcwd, sys_getcwd) -#define __NR_capget 184 -__SYSCALL(__NR_capget, sys_capget) -#define __NR_capset 185 -__SYSCALL(__NR_capset, sys_capset) -#define __NR_sigaltstack 186 -__SYSCALL(__NR_sigaltstack, compat_sys_sigaltstack) -#define __NR_sendfile 187 -__SYSCALL(__NR_sendfile, compat_sys_sendfile) - /* 188 reserved */ -__SYSCALL(188, sys_ni_syscall) - /* 189 reserved */ -__SYSCALL(189, sys_ni_syscall) -#define __NR_vfork 190 -__SYSCALL(__NR_vfork, sys_vfork) -#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ -__SYSCALL(__NR_ugetrlimit, compat_sys_getrlimit) /* SuS compliant getrlimit */ -#define __NR_mmap2 192 -__SYSCALL(__NR_mmap2, compat_sys_aarch32_mmap2) -#define __NR_truncate64 193 -__SYSCALL(__NR_truncate64, compat_sys_aarch32_truncate64) -#define __NR_ftruncate64 194 -__SYSCALL(__NR_ftruncate64, compat_sys_aarch32_ftruncate64) -#define __NR_stat64 195 -__SYSCALL(__NR_stat64, sys_stat64) -#define __NR_lstat64 196 -__SYSCALL(__NR_lstat64, sys_lstat64) -#define __NR_fstat64 197 -__SYSCALL(__NR_fstat64, sys_fstat64) -#define __NR_lchown32 198 -__SYSCALL(__NR_lchown32, sys_lchown) -#define __NR_getuid32 199 -__SYSCALL(__NR_getuid32, sys_getuid) -#define __NR_getgid32 200 -__SYSCALL(__NR_getgid32, sys_getgid) -#define __NR_geteuid32 201 -__SYSCALL(__NR_geteuid32, sys_geteuid) -#define __NR_getegid32 202 -__SYSCALL(__NR_getegid32, sys_getegid) -#define __NR_setreuid32 203 -__SYSCALL(__NR_setreuid32, sys_setreuid) -#define __NR_setregid32 204 -__SYSCALL(__NR_setregid32, sys_setregid) -#define __NR_getgroups32 205 -__SYSCALL(__NR_getgroups32, sys_getgroups) -#define __NR_setgroups32 206 -__SYSCALL(__NR_setgroups32, sys_setgroups) -#define __NR_fchown32 207 -__SYSCALL(__NR_fchown32, sys_fchown) -#define __NR_setresuid32 208 -__SYSCALL(__NR_setresuid32, sys_setresuid) -#define __NR_getresuid32 209 -__SYSCALL(__NR_getresuid32, sys_getresuid) -#define __NR_setresgid32 210 -__SYSCALL(__NR_setresgid32, sys_setresgid) -#define __NR_getresgid32 211 -__SYSCALL(__NR_getresgid32, sys_getresgid) -#define __NR_chown32 212 -__SYSCALL(__NR_chown32, sys_chown) -#define __NR_setuid32 213 -__SYSCALL(__NR_setuid32, sys_setuid) -#define __NR_setgid32 214 -__SYSCALL(__NR_setgid32, sys_setgid) -#define __NR_setfsuid32 215 -__SYSCALL(__NR_setfsuid32, sys_setfsuid) -#define __NR_setfsgid32 216 -__SYSCALL(__NR_setfsgid32, sys_setfsgid) -#define __NR_getdents64 217 -__SYSCALL(__NR_getdents64, sys_getdents64) -#define __NR_pivot_root 218 -__SYSCALL(__NR_pivot_root, sys_pivot_root) -#define __NR_mincore 219 -__SYSCALL(__NR_mincore, sys_mincore) -#define __NR_madvise 220 -__SYSCALL(__NR_madvise, sys_madvise) -#define __NR_fcntl64 221 -__SYSCALL(__NR_fcntl64, compat_sys_fcntl64) - /* 222 for tux */ -__SYSCALL(222, sys_ni_syscall) - /* 223 is unused */ -__SYSCALL(223, sys_ni_syscall) -#define __NR_gettid 224 -__SYSCALL(__NR_gettid, sys_gettid) -#define __NR_readahead 225 -__SYSCALL(__NR_readahead, compat_sys_aarch32_readahead) -#define __NR_setxattr 226 -__SYSCALL(__NR_setxattr, sys_setxattr) -#define __NR_lsetxattr 227 -__SYSCALL(__NR_lsetxattr, sys_lsetxattr) -#define __NR_fsetxattr 228 -__SYSCALL(__NR_fsetxattr, sys_fsetxattr) -#define __NR_getxattr 229 -__SYSCALL(__NR_getxattr, sys_getxattr) -#define __NR_lgetxattr 230 -__SYSCALL(__NR_lgetxattr, sys_lgetxattr) -#define __NR_fgetxattr 231 -__SYSCALL(__NR_fgetxattr, sys_fgetxattr) -#define __NR_listxattr 232 -__SYSCALL(__NR_listxattr, sys_listxattr) -#define __NR_llistxattr 233 -__SYSCALL(__NR_llistxattr, sys_llistxattr) -#define __NR_flistxattr 234 -__SYSCALL(__NR_flistxattr, sys_flistxattr) -#define __NR_removexattr 235 -__SYSCALL(__NR_removexattr, sys_removexattr) -#define __NR_lremovexattr 236 -__SYSCALL(__NR_lremovexattr, sys_lremovexattr) -#define __NR_fremovexattr 237 -__SYSCALL(__NR_fremovexattr, sys_fremovexattr) -#define __NR_tkill 238 -__SYSCALL(__NR_tkill, sys_tkill) -#define __NR_sendfile64 239 -__SYSCALL(__NR_sendfile64, sys_sendfile64) -#define __NR_futex 240 -__SYSCALL(__NR_futex, sys_futex_time32) -#define __NR_sched_setaffinity 241 -__SYSCALL(__NR_sched_setaffinity, compat_sys_sched_setaffinity) -#define __NR_sched_getaffinity 242 -__SYSCALL(__NR_sched_getaffinity, compat_sys_sched_getaffinity) -#define __NR_io_setup 243 -__SYSCALL(__NR_io_setup, compat_sys_io_setup) -#define __NR_io_destroy 244 -__SYSCALL(__NR_io_destroy, sys_io_destroy) -#define __NR_io_getevents 245 -__SYSCALL(__NR_io_getevents, sys_io_getevents_time32) -#define __NR_io_submit 246 -__SYSCALL(__NR_io_submit, compat_sys_io_submit) -#define __NR_io_cancel 247 -__SYSCALL(__NR_io_cancel, sys_io_cancel) -#define __NR_exit_group 248 -__SYSCALL(__NR_exit_group, sys_exit_group) - /* 249 was lookup_dcookie */ -__SYSCALL(249, sys_ni_syscall) -#define __NR_epoll_create 250 -__SYSCALL(__NR_epoll_create, sys_epoll_create) -#define __NR_epoll_ctl 251 -__SYSCALL(__NR_epoll_ctl, sys_epoll_ctl) -#define __NR_epoll_wait 252 -__SYSCALL(__NR_epoll_wait, sys_epoll_wait) -#define __NR_remap_file_pages 253 -__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages) - /* 254 for set_thread_area */ -__SYSCALL(254, sys_ni_syscall) - /* 255 for get_thread_area */ -__SYSCALL(255, sys_ni_syscall) -#define __NR_set_tid_address 256 -__SYSCALL(__NR_set_tid_address, sys_set_tid_address) -#define __NR_timer_create 257 -__SYSCALL(__NR_timer_create, compat_sys_timer_create) -#define __NR_timer_settime 258 -__SYSCALL(__NR_timer_settime, sys_timer_settime32) -#define __NR_timer_gettime 259 -__SYSCALL(__NR_timer_gettime, sys_timer_gettime32) -#define __NR_timer_getoverrun 260 -__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun) -#define __NR_timer_delete 261 -__SYSCALL(__NR_timer_delete, sys_timer_delete) -#define __NR_clock_settime 262 -__SYSCALL(__NR_clock_settime, sys_clock_settime32) -#define __NR_clock_gettime 263 -__SYSCALL(__NR_clock_gettime, sys_clock_gettime32) -#define __NR_clock_getres 264 -__SYSCALL(__NR_clock_getres, sys_clock_getres_time32) -#define __NR_clock_nanosleep 265 -__SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep_time32) -#define __NR_statfs64 266 -__SYSCALL(__NR_statfs64, compat_sys_aarch32_statfs64) -#define __NR_fstatfs64 267 -__SYSCALL(__NR_fstatfs64, compat_sys_aarch32_fstatfs64) -#define __NR_tgkill 268 -__SYSCALL(__NR_tgkill, sys_tgkill) -#define __NR_utimes 269 -__SYSCALL(__NR_utimes, sys_utimes_time32) -#define __NR_arm_fadvise64_64 270 -__SYSCALL(__NR_arm_fadvise64_64, compat_sys_aarch32_fadvise64_64) -#define __NR_pciconfig_iobase 271 -__SYSCALL(__NR_pciconfig_iobase, sys_pciconfig_iobase) -#define __NR_pciconfig_read 272 -__SYSCALL(__NR_pciconfig_read, sys_pciconfig_read) -#define __NR_pciconfig_write 273 -__SYSCALL(__NR_pciconfig_write, sys_pciconfig_write) -#define __NR_mq_open 274 -__SYSCALL(__NR_mq_open, compat_sys_mq_open) -#define __NR_mq_unlink 275 -__SYSCALL(__NR_mq_unlink, sys_mq_unlink) -#define __NR_mq_timedsend 276 -__SYSCALL(__NR_mq_timedsend, sys_mq_timedsend_time32) -#define __NR_mq_timedreceive 277 -__SYSCALL(__NR_mq_timedreceive, sys_mq_timedreceive_time32) -#define __NR_mq_notify 278 -__SYSCALL(__NR_mq_notify, compat_sys_mq_notify) -#define __NR_mq_getsetattr 279 -__SYSCALL(__NR_mq_getsetattr, compat_sys_mq_getsetattr) -#define __NR_waitid 280 -__SYSCALL(__NR_waitid, compat_sys_waitid) -#define __NR_socket 281 -__SYSCALL(__NR_socket, sys_socket) -#define __NR_bind 282 -__SYSCALL(__NR_bind, sys_bind) -#define __NR_connect 283 -__SYSCALL(__NR_connect, sys_connect) -#define __NR_listen 284 -__SYSCALL(__NR_listen, sys_listen) -#define __NR_accept 285 -__SYSCALL(__NR_accept, sys_accept) -#define __NR_getsockname 286 -__SYSCALL(__NR_getsockname, sys_getsockname) -#define __NR_getpeername 287 -__SYSCALL(__NR_getpeername, sys_getpeername) -#define __NR_socketpair 288 -__SYSCALL(__NR_socketpair, sys_socketpair) -#define __NR_send 289 -__SYSCALL(__NR_send, sys_send) -#define __NR_sendto 290 -__SYSCALL(__NR_sendto, sys_sendto) -#define __NR_recv 291 -__SYSCALL(__NR_recv, compat_sys_recv) -#define __NR_recvfrom 292 -__SYSCALL(__NR_recvfrom, compat_sys_recvfrom) -#define __NR_shutdown 293 -__SYSCALL(__NR_shutdown, sys_shutdown) -#define __NR_setsockopt 294 -__SYSCALL(__NR_setsockopt, sys_setsockopt) -#define __NR_getsockopt 295 -__SYSCALL(__NR_getsockopt, sys_getsockopt) -#define __NR_sendmsg 296 -__SYSCALL(__NR_sendmsg, compat_sys_sendmsg) -#define __NR_recvmsg 297 -__SYSCALL(__NR_recvmsg, compat_sys_recvmsg) -#define __NR_semop 298 -__SYSCALL(__NR_semop, sys_semop) -#define __NR_semget 299 -__SYSCALL(__NR_semget, sys_semget) -#define __NR_semctl 300 -__SYSCALL(__NR_semctl, compat_sys_old_semctl) -#define __NR_msgsnd 301 -__SYSCALL(__NR_msgsnd, compat_sys_msgsnd) -#define __NR_msgrcv 302 -__SYSCALL(__NR_msgrcv, compat_sys_msgrcv) -#define __NR_msgget 303 -__SYSCALL(__NR_msgget, sys_msgget) -#define __NR_msgctl 304 -__SYSCALL(__NR_msgctl, compat_sys_old_msgctl) -#define __NR_shmat 305 -__SYSCALL(__NR_shmat, compat_sys_shmat) -#define __NR_shmdt 306 -__SYSCALL(__NR_shmdt, sys_shmdt) -#define __NR_shmget 307 -__SYSCALL(__NR_shmget, sys_shmget) -#define __NR_shmctl 308 -__SYSCALL(__NR_shmctl, compat_sys_old_shmctl) -#define __NR_add_key 309 -__SYSCALL(__NR_add_key, sys_add_key) -#define __NR_request_key 310 -__SYSCALL(__NR_request_key, sys_request_key) -#define __NR_keyctl 311 -__SYSCALL(__NR_keyctl, compat_sys_keyctl) -#define __NR_semtimedop 312 -__SYSCALL(__NR_semtimedop, sys_semtimedop_time32) -#define __NR_vserver 313 -__SYSCALL(__NR_vserver, sys_ni_syscall) -#define __NR_ioprio_set 314 -__SYSCALL(__NR_ioprio_set, sys_ioprio_set) -#define __NR_ioprio_get 315 -__SYSCALL(__NR_ioprio_get, sys_ioprio_get) -#define __NR_inotify_init 316 -__SYSCALL(__NR_inotify_init, sys_inotify_init) -#define __NR_inotify_add_watch 317 -__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) -#define __NR_inotify_rm_watch 318 -__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) -#define __NR_mbind 319 -__SYSCALL(__NR_mbind, sys_mbind) -#define __NR_get_mempolicy 320 -__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy) -#define __NR_set_mempolicy 321 -__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy) -#define __NR_openat 322 -__SYSCALL(__NR_openat, compat_sys_openat) -#define __NR_mkdirat 323 -__SYSCALL(__NR_mkdirat, sys_mkdirat) -#define __NR_mknodat 324 -__SYSCALL(__NR_mknodat, sys_mknodat) -#define __NR_fchownat 325 -__SYSCALL(__NR_fchownat, sys_fchownat) -#define __NR_futimesat 326 -__SYSCALL(__NR_futimesat, sys_futimesat_time32) -#define __NR_fstatat64 327 -__SYSCALL(__NR_fstatat64, sys_fstatat64) -#define __NR_unlinkat 328 -__SYSCALL(__NR_unlinkat, sys_unlinkat) -#define __NR_renameat 329 -__SYSCALL(__NR_renameat, sys_renameat) -#define __NR_linkat 330 -__SYSCALL(__NR_linkat, sys_linkat) -#define __NR_symlinkat 331 -__SYSCALL(__NR_symlinkat, sys_symlinkat) -#define __NR_readlinkat 332 -__SYSCALL(__NR_readlinkat, sys_readlinkat) -#define __NR_fchmodat 333 -__SYSCALL(__NR_fchmodat, sys_fchmodat) -#define __NR_faccessat 334 -__SYSCALL(__NR_faccessat, sys_faccessat) -#define __NR_pselect6 335 -__SYSCALL(__NR_pselect6, compat_sys_pselect6_time32) -#define __NR_ppoll 336 -__SYSCALL(__NR_ppoll, compat_sys_ppoll_time32) -#define __NR_unshare 337 -__SYSCALL(__NR_unshare, sys_unshare) -#define __NR_set_robust_list 338 -__SYSCALL(__NR_set_robust_list, compat_sys_set_robust_list) -#define __NR_get_robust_list 339 -__SYSCALL(__NR_get_robust_list, compat_sys_get_robust_list) -#define __NR_splice 340 -__SYSCALL(__NR_splice, sys_splice) -#define __NR_sync_file_range2 341 -__SYSCALL(__NR_sync_file_range2, compat_sys_aarch32_sync_file_range2) -#define __NR_tee 342 -__SYSCALL(__NR_tee, sys_tee) -#define __NR_vmsplice 343 -__SYSCALL(__NR_vmsplice, sys_vmsplice) -#define __NR_move_pages 344 -__SYSCALL(__NR_move_pages, sys_move_pages) -#define __NR_getcpu 345 -__SYSCALL(__NR_getcpu, sys_getcpu) -#define __NR_epoll_pwait 346 -__SYSCALL(__NR_epoll_pwait, compat_sys_epoll_pwait) -#define __NR_kexec_load 347 -__SYSCALL(__NR_kexec_load, compat_sys_kexec_load) -#define __NR_utimensat 348 -__SYSCALL(__NR_utimensat, sys_utimensat_time32) -#define __NR_signalfd 349 -__SYSCALL(__NR_signalfd, compat_sys_signalfd) -#define __NR_timerfd_create 350 -__SYSCALL(__NR_timerfd_create, sys_timerfd_create) -#define __NR_eventfd 351 -__SYSCALL(__NR_eventfd, sys_eventfd) -#define __NR_fallocate 352 -__SYSCALL(__NR_fallocate, compat_sys_aarch32_fallocate) -#define __NR_timerfd_settime 353 -__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime32) -#define __NR_timerfd_gettime 354 -__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime32) -#define __NR_signalfd4 355 -__SYSCALL(__NR_signalfd4, compat_sys_signalfd4) -#define __NR_eventfd2 356 -__SYSCALL(__NR_eventfd2, sys_eventfd2) -#define __NR_epoll_create1 357 -__SYSCALL(__NR_epoll_create1, sys_epoll_create1) -#define __NR_dup3 358 -__SYSCALL(__NR_dup3, sys_dup3) -#define __NR_pipe2 359 -__SYSCALL(__NR_pipe2, sys_pipe2) -#define __NR_inotify_init1 360 -__SYSCALL(__NR_inotify_init1, sys_inotify_init1) -#define __NR_preadv 361 -__SYSCALL(__NR_preadv, compat_sys_preadv) -#define __NR_pwritev 362 -__SYSCALL(__NR_pwritev, compat_sys_pwritev) -#define __NR_rt_tgsigqueueinfo 363 -__SYSCALL(__NR_rt_tgsigqueueinfo, compat_sys_rt_tgsigqueueinfo) -#define __NR_perf_event_open 364 -__SYSCALL(__NR_perf_event_open, sys_perf_event_open) -#define __NR_recvmmsg 365 -__SYSCALL(__NR_recvmmsg, compat_sys_recvmmsg_time32) -#define __NR_accept4 366 -__SYSCALL(__NR_accept4, sys_accept4) -#define __NR_fanotify_init 367 -__SYSCALL(__NR_fanotify_init, sys_fanotify_init) -#define __NR_fanotify_mark 368 -__SYSCALL(__NR_fanotify_mark, compat_sys_fanotify_mark) -#define __NR_prlimit64 369 -__SYSCALL(__NR_prlimit64, sys_prlimit64) -#define __NR_name_to_handle_at 370 -__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) -#define __NR_open_by_handle_at 371 -__SYSCALL(__NR_open_by_handle_at, compat_sys_open_by_handle_at) -#define __NR_clock_adjtime 372 -__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime32) -#define __NR_syncfs 373 -__SYSCALL(__NR_syncfs, sys_syncfs) -#define __NR_sendmmsg 374 -__SYSCALL(__NR_sendmmsg, compat_sys_sendmmsg) -#define __NR_setns 375 -__SYSCALL(__NR_setns, sys_setns) -#define __NR_process_vm_readv 376 -__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv) -#define __NR_process_vm_writev 377 -__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev) -#define __NR_kcmp 378 -__SYSCALL(__NR_kcmp, sys_kcmp) -#define __NR_finit_module 379 -__SYSCALL(__NR_finit_module, sys_finit_module) -#define __NR_sched_setattr 380 -__SYSCALL(__NR_sched_setattr, sys_sched_setattr) -#define __NR_sched_getattr 381 -__SYSCALL(__NR_sched_getattr, sys_sched_getattr) -#define __NR_renameat2 382 -__SYSCALL(__NR_renameat2, sys_renameat2) -#define __NR_seccomp 383 -__SYSCALL(__NR_seccomp, sys_seccomp) -#define __NR_getrandom 384 -__SYSCALL(__NR_getrandom, sys_getrandom) -#define __NR_memfd_create 385 -__SYSCALL(__NR_memfd_create, sys_memfd_create) -#define __NR_bpf 386 -__SYSCALL(__NR_bpf, sys_bpf) -#define __NR_execveat 387 -__SYSCALL(__NR_execveat, compat_sys_execveat) -#define __NR_userfaultfd 388 -__SYSCALL(__NR_userfaultfd, sys_userfaultfd) -#define __NR_membarrier 389 -__SYSCALL(__NR_membarrier, sys_membarrier) -#define __NR_mlock2 390 -__SYSCALL(__NR_mlock2, sys_mlock2) -#define __NR_copy_file_range 391 -__SYSCALL(__NR_copy_file_range, sys_copy_file_range) -#define __NR_preadv2 392 -__SYSCALL(__NR_preadv2, compat_sys_preadv2) -#define __NR_pwritev2 393 -__SYSCALL(__NR_pwritev2, compat_sys_pwritev2) -#define __NR_pkey_mprotect 394 -__SYSCALL(__NR_pkey_mprotect, sys_pkey_mprotect) -#define __NR_pkey_alloc 395 -__SYSCALL(__NR_pkey_alloc, sys_pkey_alloc) -#define __NR_pkey_free 396 -__SYSCALL(__NR_pkey_free, sys_pkey_free) -#define __NR_statx 397 -__SYSCALL(__NR_statx, sys_statx) -#define __NR_rseq 398 -__SYSCALL(__NR_rseq, sys_rseq) -#define __NR_io_pgetevents 399 -__SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents) -#define __NR_migrate_pages 400 -__SYSCALL(__NR_migrate_pages, sys_migrate_pages) -#define __NR_kexec_file_load 401 -__SYSCALL(__NR_kexec_file_load, sys_kexec_file_load) -/* 402 is unused */ -#define __NR_clock_gettime64 403 -__SYSCALL(__NR_clock_gettime64, sys_clock_gettime) -#define __NR_clock_settime64 404 -__SYSCALL(__NR_clock_settime64, sys_clock_settime) -#define __NR_clock_adjtime64 405 -__SYSCALL(__NR_clock_adjtime64, sys_clock_adjtime) -#define __NR_clock_getres_time64 406 -__SYSCALL(__NR_clock_getres_time64, sys_clock_getres) -#define __NR_clock_nanosleep_time64 407 -__SYSCALL(__NR_clock_nanosleep_time64, sys_clock_nanosleep) -#define __NR_timer_gettime64 408 -__SYSCALL(__NR_timer_gettime64, sys_timer_gettime) -#define __NR_timer_settime64 409 -__SYSCALL(__NR_timer_settime64, sys_timer_settime) -#define __NR_timerfd_gettime64 410 -__SYSCALL(__NR_timerfd_gettime64, sys_timerfd_gettime) -#define __NR_timerfd_settime64 411 -__SYSCALL(__NR_timerfd_settime64, sys_timerfd_settime) -#define __NR_utimensat_time64 412 -__SYSCALL(__NR_utimensat_time64, sys_utimensat) -#define __NR_pselect6_time64 413 -__SYSCALL(__NR_pselect6_time64, compat_sys_pselect6_time64) -#define __NR_ppoll_time64 414 -__SYSCALL(__NR_ppoll_time64, compat_sys_ppoll_time64) -#define __NR_io_pgetevents_time64 416 -__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents) -#define __NR_recvmmsg_time64 417 -__SYSCALL(__NR_recvmmsg_time64, compat_sys_recvmmsg_time64) -#define __NR_mq_timedsend_time64 418 -__SYSCALL(__NR_mq_timedsend_time64, sys_mq_timedsend) -#define __NR_mq_timedreceive_time64 419 -__SYSCALL(__NR_mq_timedreceive_time64, sys_mq_timedreceive) -#define __NR_semtimedop_time64 420 -__SYSCALL(__NR_semtimedop_time64, sys_semtimedop) -#define __NR_rt_sigtimedwait_time64 421 -__SYSCALL(__NR_rt_sigtimedwait_time64, compat_sys_rt_sigtimedwait_time64) -#define __NR_futex_time64 422 -__SYSCALL(__NR_futex_time64, sys_futex) -#define __NR_sched_rr_get_interval_time64 423 -__SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval) -#define __NR_pidfd_send_signal 424 -__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal) -#define __NR_io_uring_setup 425 -__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup) -#define __NR_io_uring_enter 426 -__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter) -#define __NR_io_uring_register 427 -__SYSCALL(__NR_io_uring_register, sys_io_uring_register) -#define __NR_open_tree 428 -__SYSCALL(__NR_open_tree, sys_open_tree) -#define __NR_move_mount 429 -__SYSCALL(__NR_move_mount, sys_move_mount) -#define __NR_fsopen 430 -__SYSCALL(__NR_fsopen, sys_fsopen) -#define __NR_fsconfig 431 -__SYSCALL(__NR_fsconfig, sys_fsconfig) -#define __NR_fsmount 432 -__SYSCALL(__NR_fsmount, sys_fsmount) -#define __NR_fspick 433 -__SYSCALL(__NR_fspick, sys_fspick) -#define __NR_pidfd_open 434 -__SYSCALL(__NR_pidfd_open, sys_pidfd_open) -#define __NR_clone3 435 -__SYSCALL(__NR_clone3, sys_clone3) -#define __NR_close_range 436 -__SYSCALL(__NR_close_range, sys_close_range) -#define __NR_openat2 437 -__SYSCALL(__NR_openat2, sys_openat2) -#define __NR_pidfd_getfd 438 -__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) -#define __NR_faccessat2 439 -__SYSCALL(__NR_faccessat2, sys_faccessat2) -#define __NR_process_madvise 440 -__SYSCALL(__NR_process_madvise, sys_process_madvise) -#define __NR_epoll_pwait2 441 -__SYSCALL(__NR_epoll_pwait2, compat_sys_epoll_pwait2) -#define __NR_mount_setattr 442 -__SYSCALL(__NR_mount_setattr, sys_mount_setattr) -#define __NR_quotactl_fd 443 -__SYSCALL(__NR_quotactl_fd, sys_quotactl_fd) -#define __NR_landlock_create_ruleset 444 -__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) -#define __NR_landlock_add_rule 445 -__SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule) -#define __NR_landlock_restrict_self 446 -__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) -#define __NR_process_mrelease 448 -__SYSCALL(__NR_process_mrelease, sys_process_mrelease) -#define __NR_futex_waitv 449 -__SYSCALL(__NR_futex_waitv, sys_futex_waitv) -#define __NR_set_mempolicy_home_node 450 -__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) -#define __NR_cachestat 451 -__SYSCALL(__NR_cachestat, sys_cachestat) -#define __NR_fchmodat2 452 -__SYSCALL(__NR_fchmodat2, sys_fchmodat2) -#define __NR_map_shadow_stack 453 -__SYSCALL(__NR_map_shadow_stack, sys_map_shadow_stack) -#define __NR_futex_wake 454 -__SYSCALL(__NR_futex_wake, sys_futex_wake) -#define __NR_futex_wait 455 -__SYSCALL(__NR_futex_wait, sys_futex_wait) -#define __NR_futex_requeue 456 -__SYSCALL(__NR_futex_requeue, sys_futex_requeue) -#define __NR_statmount 457 -__SYSCALL(__NR_statmount, sys_statmount) -#define __NR_listmount 458 -__SYSCALL(__NR_listmount, sys_listmount) -#define __NR_lsm_get_self_attr 459 -__SYSCALL(__NR_lsm_get_self_attr, sys_lsm_get_self_attr) -#define __NR_lsm_set_self_attr 460 -__SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr) -#define __NR_lsm_list_modules 461 -__SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) +#define __NR_sync_file_range2 __NR_arm_sync_file_range -/* - * Please add new compat syscalls above this comment and update - * __NR_compat_syscalls in asm/unistd.h. - */ +#endif /* _UAPI__ASM_ARM_UNISTD_H */ diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h index 2b09495499c6..014b02897f8e 100644 --- a/arch/arm64/include/asm/uprobes.h +++ b/arch/arm64/include/asm/uprobes.h @@ -10,11 +10,9 @@ #include <asm/insn.h> #include <asm/probes.h> -#define MAX_UINSN_BYTES AARCH64_INSN_SIZE - #define UPROBE_SWBP_INSN cpu_to_le32(BRK64_OPCODE_UPROBES) #define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE -#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES +#define UPROBE_XOL_SLOT_BYTES AARCH64_INSN_SIZE typedef __le32 uprobe_opcode_t; @@ -23,8 +21,8 @@ struct arch_uprobe_task { struct arch_uprobe { union { - u8 insn[MAX_UINSN_BYTES]; - u8 ixol[MAX_UINSN_BYTES]; + __le32 insn; + __le32 ixol; }; struct arch_probe_insn api; bool simulate; diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h index 4305995c8f82..3e3c3fdb1842 100644 --- a/arch/arm64/include/asm/vdso.h +++ b/arch/arm64/include/asm/vdso.h @@ -5,13 +5,6 @@ #ifndef __ASM_VDSO_H #define __ASM_VDSO_H -/* - * Default link address for the vDSO. - * Since we randomise the VDSO mapping, there's little point in trying - * to prelink this. - */ -#define VDSO_LBASE 0x0 - #define __VVAR_PAGES 2 #ifndef __ASSEMBLY__ @@ -20,7 +13,7 @@ #define VDSO_SYMBOL(base, name) \ ({ \ - (void *)(vdso_offset_##name - VDSO_LBASE + (unsigned long)(base)); \ + (void *)(vdso_offset_##name + (unsigned long)(base)); \ }) extern char vdso_start[], vdso_end[]; diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index ecb6fd4c3c64..778c1202bbbf 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -8,7 +8,7 @@ #ifndef __ASSEMBLY__ #include <asm/barrier.h> -#include <asm/unistd.h> +#include <asm/unistd_compat_32.h> #include <asm/errno.h> #include <asm/vdso/compat_barrier.h> @@ -24,7 +24,7 @@ int gettimeofday_fallback(struct __kernel_old_timeval *_tv, register struct timezone *tz asm("r1") = _tz; register struct __kernel_old_timeval *tv asm("r0") = _tv; register long ret asm ("r0"); - register long nr asm("r7") = __NR_compat_gettimeofday; + register long nr asm("r7") = __NR_compat32_gettimeofday; asm volatile( " swi #0\n" @@ -41,7 +41,7 @@ long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) register struct __kernel_timespec *ts asm("r1") = _ts; register clockid_t clkid asm("r0") = _clkid; register long ret asm ("r0"); - register long nr asm("r7") = __NR_compat_clock_gettime64; + register long nr asm("r7") = __NR_compat32_clock_gettime64; asm volatile( " swi #0\n" @@ -58,7 +58,7 @@ long clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) register struct old_timespec32 *ts asm("r1") = _ts; register clockid_t clkid asm("r0") = _clkid; register long ret asm ("r0"); - register long nr asm("r7") = __NR_compat_clock_gettime; + register long nr asm("r7") = __NR_compat32_clock_gettime; asm volatile( " swi #0\n" @@ -75,7 +75,7 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) register struct __kernel_timespec *ts asm("r1") = _ts; register clockid_t clkid asm("r0") = _clkid; register long ret asm ("r0"); - register long nr asm("r7") = __NR_compat_clock_getres_time64; + register long nr asm("r7") = __NR_compat32_clock_getres_time64; asm volatile( " swi #0\n" @@ -92,7 +92,7 @@ int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) register struct old_timespec32 *ts asm("r1") = _ts; register clockid_t clkid asm("r0") = _clkid; register long ret asm ("r0"); - register long nr asm("r7") = __NR_compat_clock_getres; + register long nr asm("r7") = __NR_compat32_clock_getres; asm volatile( " swi #0\n" diff --git a/arch/arm64/include/asm/vdso/getrandom.h b/arch/arm64/include/asm/vdso/getrandom.h new file mode 100644 index 000000000000..342f807e2044 --- /dev/null +++ b/arch/arm64/include/asm/vdso/getrandom.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_VDSO_GETRANDOM_H +#define __ASM_VDSO_GETRANDOM_H + +#ifndef __ASSEMBLY__ + +#include <asm/unistd.h> +#include <asm/vdso/vsyscall.h> +#include <vdso/datapage.h> + +/** + * getrandom_syscall - Invoke the getrandom() syscall. + * @buffer: Destination buffer to fill with random bytes. + * @len: Size of @buffer in bytes. + * @flags: Zero or more GRND_* flags. + * Returns: The number of random bytes written to @buffer, or a negative value indicating an error. + */ +static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, unsigned int _flags) +{ + register void *buffer asm ("x0") = _buffer; + register size_t len asm ("x1") = _len; + register unsigned int flags asm ("x2") = _flags; + register long ret asm ("x0"); + register long nr asm ("x8") = __NR_getrandom; + + asm volatile( + " svc #0\n" + : "=r" (ret) + : "r" (buffer), "r" (len), "r" (flags), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) +{ + /* + * The RNG data is in the real VVAR data page, but if a task belongs to a time namespace + * then VVAR_DATA_PAGE_OFFSET points to the namespace-specific VVAR page and VVAR_TIMENS_ + * PAGE_OFFSET points to the real VVAR page. + */ + if (IS_ENABLED(CONFIG_TIME_NS) && _vdso_data->clock_mode == VDSO_CLOCKMODE_TIMENS) + return (void *)&_vdso_rng_data + VVAR_TIMENS_PAGE_OFFSET * (1UL << CONFIG_PAGE_SHIFT); + return &_vdso_rng_data; +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h index f94b1457c117..eea51946d45a 100644 --- a/arch/arm64/include/asm/vdso/vsyscall.h +++ b/arch/arm64/include/asm/vdso/vsyscall.h @@ -2,11 +2,18 @@ #ifndef __ASM_VDSO_VSYSCALL_H #define __ASM_VDSO_VSYSCALL_H +#define __VDSO_RND_DATA_OFFSET 480 + #ifndef __ASSEMBLY__ -#include <linux/timekeeper_internal.h> #include <vdso/datapage.h> +enum vvar_pages { + VVAR_DATA_PAGE_OFFSET, + VVAR_TIMENS_PAGE_OFFSET, + VVAR_NR_PAGES, +}; + #define VDSO_PRECISION_MASK ~(0xFF00ULL<<48) extern struct vdso_data *vdso_data; @@ -22,7 +29,14 @@ struct vdso_data *__arm64_get_k_vdso_data(void) #define __arch_get_k_vdso_data __arm64_get_k_vdso_data static __always_inline -void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) +struct vdso_rng_data *__arm64_get_k_vdso_rnd_data(void) +{ + return (void *)vdso_data + __VDSO_RND_DATA_OFFSET; +} +#define __arch_get_k_vdso_rng_data __arm64_get_k_vdso_rnd_data + +static __always_inline +void __arm64_update_vsyscall(struct vdso_data *vdata) { vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK; vdata[CS_RAW].mask = VDSO_PRECISION_MASK; diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 261d6e9df2e1..ebf4a9f943ed 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -82,6 +82,12 @@ bool is_kvm_arm_initialised(void); DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); +static inline bool is_pkvm_initialized(void) +{ + return IS_ENABLED(CONFIG_KVM) && + static_branch_likely(&kvm_protected_mode_initialized); +} + /* Reports the availability of HYP mode */ static inline bool is_hyp_mode_available(void) { @@ -89,8 +95,7 @@ static inline bool is_hyp_mode_available(void) * If KVM protected mode is initialized, all CPUs must have been booted * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1. */ - if (IS_ENABLED(CONFIG_KVM) && - static_branch_likely(&kvm_protected_mode_initialized)) + if (is_pkvm_initialized()) return true; return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 && @@ -104,8 +109,7 @@ static inline bool is_hyp_mode_mismatched(void) * If KVM protected mode is initialized, all CPUs must have been booted * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1. */ - if (IS_ENABLED(CONFIG_KVM) && - static_branch_likely(&kvm_protected_mode_initialized)) + if (is_pkvm_initialized()) return false; return __boot_cpu_mode[0] != __boot_cpu_mode[1]; diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h index df2c47c55972..4f9bbd4d6c26 100644 --- a/arch/arm64/include/asm/vncr_mapping.h +++ b/arch/arm64/include/asm/vncr_mapping.h @@ -50,8 +50,8 @@ #define VNCR_VBAR_EL1 0x250 #define VNCR_TCR2_EL1 0x270 #define VNCR_PIRE0_EL1 0x290 -#define VNCR_PIRE0_EL2 0x298 #define VNCR_PIR_EL1 0x2A0 +#define VNCR_POR_EL1 0x2A8 #define VNCR_ICH_LR0_EL2 0x400 #define VNCR_ICH_LR1_EL2 0x408 #define VNCR_ICH_LR2_EL2 0x410 diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h index f3b151ed0d7a..824ca6987a51 100644 --- a/arch/arm64/include/asm/word-at-a-time.h +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -9,7 +9,8 @@ #ifndef __AARCH64EB__ -#include <linux/kernel.h> +#include <linux/bitops.h> +#include <linux/wordpart.h> struct word_at_a_time { const unsigned long one_bits, high_bits; @@ -26,20 +27,15 @@ static inline unsigned long has_zero(unsigned long a, unsigned long *bits, } #define prep_zero_mask(a, bits, c) (bits) +#define create_zero_mask(bits) (bits) +#define find_zero(bits) (__ffs(bits) >> 3) -static inline unsigned long create_zero_mask(unsigned long bits) +static inline unsigned long zero_bytemask(unsigned long bits) { bits = (bits - 1) & ~bits; return bits >> 7; } -static inline unsigned long find_zero(unsigned long mask) -{ - return fls64(mask) >> 3; -} - -#define zero_bytemask(mask) (mask) - #else /* __AARCH64EB__ */ #include <asm-generic/word-at-a-time.h> #endif diff --git a/arch/arm64/include/uapi/asm/Kbuild b/arch/arm64/include/uapi/asm/Kbuild index 602d137932dc..c6d141d7b7d7 100644 --- a/arch/arm64/include/uapi/asm/Kbuild +++ b/arch/arm64/include/uapi/asm/Kbuild @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 +syscall-y += unistd_64.h generic-y += kvm_para.h diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 5023599fa278..705a7afa8e58 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -21,7 +21,7 @@ * HWCAP flags - for AT_HWCAP * * Bits 62 and 63 are reserved for use by libc. - * Bits 32-61 are unallocated for potential use by libc. + * Bits 33-61 are unallocated for potential use by libc. */ #define HWCAP_FP (1 << 0) #define HWCAP_ASIMD (1 << 1) @@ -55,6 +55,22 @@ #define HWCAP_SB (1 << 29) #define HWCAP_PACA (1 << 30) #define HWCAP_PACG (1UL << 31) +#define HWCAP_GCS (1UL << 32) +#define HWCAP_CMPBR (1UL << 33) +#define HWCAP_FPRCVT (1UL << 34) +#define HWCAP_F8MM8 (1UL << 35) +#define HWCAP_F8MM4 (1UL << 36) +#define HWCAP_SVE_F16MM (1UL << 37) +#define HWCAP_SVE_ELTPERM (1UL << 38) +#define HWCAP_SVE_AES2 (1UL << 39) +#define HWCAP_SVE_BFSCALE (1UL << 40) +#define HWCAP_SVE2P2 (1UL << 41) +#define HWCAP_SME2P2 (1UL << 42) +#define HWCAP_SME_SBITPERM (1UL << 43) +#define HWCAP_SME_AES (1UL << 44) +#define HWCAP_SME_SFEXPA (1UL << 45) +#define HWCAP_SME_STMOP (1UL << 46) +#define HWCAP_SME_SMOP4 (1UL << 47) /* * HWCAP2 flags - for AT_HWCAP2 @@ -107,5 +123,25 @@ #define HWCAP2_SVE_B16B16 (1UL << 45) #define HWCAP2_LRCPC3 (1UL << 46) #define HWCAP2_LSE128 (1UL << 47) +#define HWCAP2_FPMR (1UL << 48) +#define HWCAP2_LUT (1UL << 49) +#define HWCAP2_FAMINMAX (1UL << 50) +#define HWCAP2_F8CVT (1UL << 51) +#define HWCAP2_F8FMA (1UL << 52) +#define HWCAP2_F8DP4 (1UL << 53) +#define HWCAP2_F8DP2 (1UL << 54) +#define HWCAP2_F8E4M3 (1UL << 55) +#define HWCAP2_F8E5M2 (1UL << 56) +#define HWCAP2_SME_LUTV2 (1UL << 57) +#define HWCAP2_SME_F8F16 (1UL << 58) +#define HWCAP2_SME_F8F32 (1UL << 59) +#define HWCAP2_SME_SF8FMA (1UL << 60) +#define HWCAP2_SME_SF8DP4 (1UL << 61) +#define HWCAP2_SME_SF8DP2 (1UL << 62) +#define HWCAP2_POE (1UL << 63) + +/* + * HWCAP3 flags - for AT_HWCAP3 + */ #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 89d2fc872d9f..568bf858f319 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -37,17 +37,12 @@ #include <asm/ptrace.h> #include <asm/sve_context.h> -#define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE -#define __KVM_HAVE_READONLY_MEM #define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_DIRTY_LOG_PAGE_OFFSET 64 -#define KVM_REG_SIZE(id) \ - (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) - struct kvm_regs { struct user_pt_regs regs; /* sp = sp_el0 */ @@ -76,11 +71,11 @@ struct kvm_regs { /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 -#define KVM_ARM_DEVICE_TYPE_MASK GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ - KVM_ARM_DEVICE_TYPE_SHIFT) +#define KVM_ARM_DEVICE_TYPE_MASK __GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ + KVM_ARM_DEVICE_TYPE_SHIFT) #define KVM_ARM_DEVICE_ID_SHIFT 16 -#define KVM_ARM_DEVICE_ID_MASK GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ - KVM_ARM_DEVICE_ID_SHIFT) +#define KVM_ARM_DEVICE_ID_MASK __GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ + KVM_ARM_DEVICE_ID_SHIFT) /* Supported device IDs */ #define KVM_ARM_DEVICE_VGIC_V2 0 @@ -162,6 +157,11 @@ struct kvm_sync_regs { __u64 device_irq_level; }; +/* Bits for run->s.regs.device_irq_level */ +#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) +#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) +#define KVM_ARM_DEV_PMU (1 << 2) + /* * PMU filter structure. Describe a range of events with a particular * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. @@ -481,6 +481,12 @@ enum { */ #define KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 (1ULL << 0) +/* + * Shutdown caused by a PSCI v1.3 SYSTEM_OFF2 call. + * Valid only when the system event has a type of KVM_SYSTEM_EVENT_SHUTDOWN. + */ +#define KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2 (1ULL << 0) + /* run->fail_entry.hardware_entry_failure_reason codes. */ #define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0) diff --git a/arch/arm64/include/uapi/asm/mman.h b/arch/arm64/include/uapi/asm/mman.h index 1e6482a838e1..e7e0c8216243 100644 --- a/arch/arm64/include/uapi/asm/mman.h +++ b/arch/arm64/include/uapi/asm/mman.h @@ -7,4 +7,13 @@ #define PROT_BTI 0x10 /* BTI guarded page */ #define PROT_MTE 0x20 /* Normal Tagged mapping */ +/* Override any generic PKEY permission defines */ +#define PKEY_DISABLE_EXECUTE 0x4 +#define PKEY_DISABLE_READ 0x8 +#undef PKEY_ACCESS_MASK +#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ + PKEY_DISABLE_WRITE |\ + PKEY_DISABLE_READ |\ + PKEY_DISABLE_EXECUTE) + #endif /* ! _UAPI__ASM_MMAN_H */ diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 7fa2f7036aa7..0f39ba4f3efd 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -324,6 +324,14 @@ struct user_za_header { #define ZA_PT_SIZE(vq) \ (ZA_PT_ZA_OFFSET + ZA_PT_ZA_SIZE(vq)) +/* GCS state (NT_ARM_GCS) */ + +struct user_gcs { + __u64 features_enabled; + __u64 features_locked; + __u64 gcspr_el0; +}; + #endif /* __ASSEMBLY__ */ #endif /* _UAPI__ASM_PTRACE_H */ diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index f23c1dc3f002..d42f7a92238b 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -98,6 +98,13 @@ struct esr_context { __u64 esr; }; +#define POE_MAGIC 0x504f4530 + +struct poe_context { + struct _aarch64_ctx head; + __u64 por_el0; +}; + /* * extra_context: describes extra space in the signal frame for * additional structures that don't fit in sigcontext.__reserved[]. @@ -152,6 +159,14 @@ struct tpidr2_context { __u64 tpidr2; }; +/* FPMR context */ +#define FPMR_MAGIC 0x46504d52 + +struct fpmr_context { + struct _aarch64_ctx head; + __u64 fpmr; +}; + #define ZA_MAGIC 0x54366345 struct za_context { @@ -168,6 +183,15 @@ struct zt_context { __u16 __reserved[3]; }; +#define GCS_MAGIC 0x47435300 + +struct gcs_context { + struct _aarch64_ctx head; + __u64 gcspr; + __u64 features_enabled; + __u64 reserved; +}; + #endif /* !__ASSEMBLY__ */ #include <asm/sve_context.h> @@ -312,10 +336,10 @@ struct zt_context { ((sizeof(struct za_context) + (__SVE_VQ_BYTES - 1)) \ / __SVE_VQ_BYTES * __SVE_VQ_BYTES) -#define ZA_SIG_REGS_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES)) +#define ZA_SIG_REGS_SIZE(vq) (((vq) * __SVE_VQ_BYTES) * ((vq) * __SVE_VQ_BYTES)) #define ZA_SIG_ZAV_OFFSET(vq, n) (ZA_SIG_REGS_OFFSET + \ - (SVE_SIG_ZREG_SIZE(vq) * n)) + (SVE_SIG_ZREG_SIZE(vq) * (n))) #define ZA_SIG_CONTEXT_SIZE(vq) \ (ZA_SIG_REGS_OFFSET + ZA_SIG_REGS_SIZE(vq)) @@ -326,7 +350,7 @@ struct zt_context { #define ZT_SIG_REGS_OFFSET sizeof(struct zt_context) -#define ZT_SIG_REGS_SIZE(n) (ZT_SIG_REG_BYTES * n) +#define ZT_SIG_REGS_SIZE(n) (ZT_SIG_REG_BYTES * (n)) #define ZT_SIG_CONTEXT_SIZE(n) \ (sizeof(struct zt_context) + ZT_SIG_REGS_SIZE(n)) diff --git a/arch/arm64/include/uapi/asm/sve_context.h b/arch/arm64/include/uapi/asm/sve_context.h index 754ab751b523..72aefc081061 100644 --- a/arch/arm64/include/uapi/asm/sve_context.h +++ b/arch/arm64/include/uapi/asm/sve_context.h @@ -13,6 +13,17 @@ #define __SVE_VQ_BYTES 16 /* number of bytes per quadword */ +/* + * Yes, __SVE_VQ_MAX is 512 QUADWORDS. + * + * To help ensure forward portability, this is much larger than the + * current maximum value defined by the SVE architecture. While arrays + * or static allocations can be sized based on this value, watch out! + * It will waste a surprisingly large amount of memory. + * + * Dynamic sizing based on the actual runtime vector length is likely to + * be preferable for most purposes. + */ #define __SVE_VQ_MIN 1 #define __SVE_VQ_MAX 512 diff --git a/arch/arm64/include/uapi/asm/unistd.h b/arch/arm64/include/uapi/asm/unistd.h index ce2ee8f1e361..df36f23876e8 100644 --- a/arch/arm64/include/uapi/asm/unistd.h +++ b/arch/arm64/include/uapi/asm/unistd.h @@ -1,25 +1,2 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#define __ARCH_WANT_RENAMEAT -#define __ARCH_WANT_NEW_STAT -#define __ARCH_WANT_SET_GET_RLIMIT -#define __ARCH_WANT_TIME32_SYSCALLS -#define __ARCH_WANT_SYS_CLONE3 -#define __ARCH_WANT_MEMFD_SECRET - -#include <asm-generic/unistd.h> +#include <asm/unistd_64.h> |