diff options
Diffstat (limited to 'arch/arm64')
50 files changed, 754 insertions, 551 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5c7ae4c3954b..d340b28eb2fb 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -192,6 +192,7 @@ config ARM64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUTEX_CMPXCHG if FUTEX select MMU_GATHER_RCU_TABLE_FREE @@ -1135,7 +1136,7 @@ config CRASH_DUMP config TRANS_TABLE def_bool y - depends on HIBERNATION + depends on HIBERNATION || KEXEC_CORE config XEN_DOM0 def_bool y diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 88d20f04c64a..af1fafbe7e1d 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -32,7 +32,7 @@ ({ \ const struct arch_timer_erratum_workaround *__wa; \ __wa = __this_cpu_read(timer_unstable_counter_workaround); \ - (__wa && __wa->h) ? __wa->h : arch_timer_##h; \ + (__wa && __wa->h) ? ({ isb(); __wa->h;}) : arch_timer_##h; \ }) #else @@ -52,8 +52,6 @@ struct arch_timer_erratum_workaround { enum arch_timer_erratum_match_type match_type; const void *id; const char *desc; - u32 (*read_cntp_tval_el0)(void); - u32 (*read_cntv_tval_el0)(void); u64 (*read_cntpct_el0)(void); u64 (*read_cntvct_el0)(void); int (*set_next_event_phys)(unsigned long, struct clock_event_device *); @@ -64,25 +62,28 @@ struct arch_timer_erratum_workaround { DECLARE_PER_CPU(const struct arch_timer_erratum_workaround *, timer_unstable_counter_workaround); -/* inline sysreg accessors that make erratum_handler() work */ -static inline notrace u32 arch_timer_read_cntp_tval_el0(void) +static inline notrace u64 arch_timer_read_cntpct_el0(void) { - return read_sysreg(cntp_tval_el0); -} + u64 cnt; -static inline notrace u32 arch_timer_read_cntv_tval_el0(void) -{ - return read_sysreg(cntv_tval_el0); -} + asm volatile(ALTERNATIVE("isb\n mrs %0, cntpct_el0", + "nop\n" __mrs_s("%0", SYS_CNTPCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (cnt)); -static inline notrace u64 arch_timer_read_cntpct_el0(void) -{ - return read_sysreg(cntpct_el0); + return cnt; } static inline notrace u64 arch_timer_read_cntvct_el0(void) { - return read_sysreg(cntvct_el0); + u64 cnt; + + asm volatile(ALTERNATIVE("isb\n mrs %0, cntvct_el0", + "nop\n" __mrs_s("%0", SYS_CNTVCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (cnt)); + + return cnt; } #define arch_timer_reg_read_stable(reg) \ @@ -102,51 +103,58 @@ static inline notrace u64 arch_timer_read_cntvct_el0(void) * the code. */ static __always_inline -void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val) +void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u64 val) { if (access == ARCH_TIMER_PHYS_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: write_sysreg(val, cntp_ctl_el0); + isb(); break; - case ARCH_TIMER_REG_TVAL: - write_sysreg(val, cntp_tval_el0); + case ARCH_TIMER_REG_CVAL: + write_sysreg(val, cntp_cval_el0); break; + default: + BUILD_BUG(); } } else if (access == ARCH_TIMER_VIRT_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: write_sysreg(val, cntv_ctl_el0); + isb(); break; - case ARCH_TIMER_REG_TVAL: - write_sysreg(val, cntv_tval_el0); + case ARCH_TIMER_REG_CVAL: + write_sysreg(val, cntv_cval_el0); break; + default: + BUILD_BUG(); } + } else { + BUILD_BUG(); } - - isb(); } static __always_inline -u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg) +u64 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg) { if (access == ARCH_TIMER_PHYS_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: return read_sysreg(cntp_ctl_el0); - case ARCH_TIMER_REG_TVAL: - return arch_timer_reg_read_stable(cntp_tval_el0); + default: + BUILD_BUG(); } } else if (access == ARCH_TIMER_VIRT_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: return read_sysreg(cntv_ctl_el0); - case ARCH_TIMER_REG_TVAL: - return arch_timer_reg_read_stable(cntv_tval_el0); + default: + BUILD_BUG(); } } - BUG(); + BUILD_BUG(); + unreachable(); } static inline u32 arch_timer_get_cntfrq(void) @@ -169,7 +177,6 @@ static __always_inline u64 __arch_counter_get_cntpct_stable(void) { u64 cnt; - isb(); cnt = arch_timer_reg_read_stable(cntpct_el0); arch_counter_enforce_ordering(cnt); return cnt; @@ -179,8 +186,10 @@ static __always_inline u64 __arch_counter_get_cntpct(void) { u64 cnt; - isb(); - cnt = read_sysreg(cntpct_el0); + asm volatile(ALTERNATIVE("isb\n mrs %0, cntpct_el0", + "nop\n" __mrs_s("%0", SYS_CNTPCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (cnt)); arch_counter_enforce_ordering(cnt); return cnt; } @@ -189,7 +198,6 @@ static __always_inline u64 __arch_counter_get_cntvct_stable(void) { u64 cnt; - isb(); cnt = arch_timer_reg_read_stable(cntvct_el0); arch_counter_enforce_ordering(cnt); return cnt; @@ -199,8 +207,10 @@ static __always_inline u64 __arch_counter_get_cntvct(void) { u64 cnt; - isb(); - cnt = read_sysreg(cntvct_el0); + asm volatile(ALTERNATIVE("isb\n mrs %0, cntvct_el0", + "nop\n" __mrs_s("%0", SYS_CNTVCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (cnt)); arch_counter_enforce_ordering(cnt); return cnt; } diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h new file mode 100644 index 000000000000..c39f2437e08e --- /dev/null +++ b/arch/arm64/include/asm/asm-extable.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_ASM_EXTABLE_H +#define __ASM_ASM_EXTABLE_H + +#define EX_TYPE_NONE 0 +#define EX_TYPE_FIXUP 1 +#define EX_TYPE_BPF 2 +#define EX_TYPE_UACCESS_ERR_ZERO 3 +#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4 + +#ifdef __ASSEMBLY__ + +#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ + .pushsection __ex_table, "a"; \ + .align 2; \ + .long ((insn) - .); \ + .long ((fixup) - .); \ + .short (type); \ + .short (data); \ + .popsection; + +/* + * Create an exception table entry for `insn`, which will branch to `fixup` + * when an unhandled fault is taken. + */ + .macro _asm_extable, insn, fixup + __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0) + .endm + +/* + * Create an exception table entry for `insn` if `fixup` is provided. Otherwise + * do nothing. + */ + .macro _cond_extable, insn, fixup + .ifnc \fixup, + _asm_extable \insn, \fixup + .endif + .endm + +#else /* __ASSEMBLY__ */ + +#include <linux/bits.h> +#include <linux/stringify.h> + +#include <asm/gpr-num.h> + +#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ + ".pushsection __ex_table, \"a\"\n" \ + ".align 2\n" \ + ".long ((" insn ") - .)\n" \ + ".long ((" fixup ") - .)\n" \ + ".short (" type ")\n" \ + ".short (" data ")\n" \ + ".popsection\n" + +#define _ASM_EXTABLE(insn, fixup) \ + __ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0") + +#define EX_DATA_REG_ERR_SHIFT 0 +#define EX_DATA_REG_ERR GENMASK(4, 0) +#define EX_DATA_REG_ZERO_SHIFT 5 +#define EX_DATA_REG_ZERO GENMASK(9, 5) + +#define EX_DATA_REG(reg, gpr) \ + "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")" + +#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) \ + __DEFINE_ASM_GPR_NUMS \ + __ASM_EXTABLE_RAW(#insn, #fixup, \ + __stringify(EX_TYPE_UACCESS_ERR_ZERO), \ + "(" \ + EX_DATA_REG(ERR, err) " | " \ + EX_DATA_REG(ZERO, zero) \ + ")") + +#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, wzr) + +#define EX_DATA_REG_DATA_SHIFT 0 +#define EX_DATA_REG_DATA GENMASK(4, 0) +#define EX_DATA_REG_ADDR_SHIFT 5 +#define EX_DATA_REG_ADDR GENMASK(9, 5) + +#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \ + __DEFINE_ASM_GPR_NUMS \ + __ASM_EXTABLE_RAW(#insn, #fixup, \ + __stringify(EX_TYPE_LOAD_UNALIGNED_ZEROPAD), \ + "(" \ + EX_DATA_REG(DATA, data) " | " \ + EX_DATA_REG(ADDR, addr) \ + ")") + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_ASM_EXTABLE_H */ diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index ccedf548dac9..0557af834e03 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -3,10 +3,11 @@ #define __ASM_ASM_UACCESS_H #include <asm/alternative-macros.h> +#include <asm/asm-extable.h> +#include <asm/assembler.h> #include <asm/kernel-pgtable.h> #include <asm/mmu.h> #include <asm/sysreg.h> -#include <asm/assembler.h> /* * User access enabling/disabling macros. @@ -58,6 +59,10 @@ alternative_else_nop_endif .endm #endif +#define USER(l, x...) \ +9999: x; \ + _asm_extable 9999b, l + /* * Generate the assembly for LDTR/STTR with exception table entries. * This is complicated as there is no post-increment or pair versions of the diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index bfa58409a4d4..136d13f3d6e9 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -14,9 +14,10 @@ #include <asm-generic/export.h> -#include <asm/asm-offsets.h> #include <asm/alternative.h> #include <asm/asm-bug.h> +#include <asm/asm-extable.h> +#include <asm/asm-offsets.h> #include <asm/cpufeature.h> #include <asm/cputype.h> #include <asm/debug-monitors.h> @@ -130,32 +131,6 @@ alternative_endif .endm /* - * Create an exception table entry for `insn`, which will branch to `fixup` - * when an unhandled fault is taken. - */ - .macro _asm_extable, insn, fixup - .pushsection __ex_table, "a" - .align 3 - .long (\insn - .), (\fixup - .) - .popsection - .endm - -/* - * Create an exception table entry for `insn` if `fixup` is provided. Otherwise - * do nothing. - */ - .macro _cond_extable, insn, fixup - .ifnc \fixup, - _asm_extable \insn, \fixup - .endif - .endm - - -#define USER(l, x...) \ -9999: x; \ - _asm_extable 9999b, l - -/* * Register aliases. */ lr .req x30 // link register @@ -405,19 +380,19 @@ alternative_endif /* * Macro to perform a data cache maintenance for the interval - * [start, end) + * [start, end) with dcache line size explicitly provided. * * op: operation passed to dc instruction * domain: domain used in dsb instruciton * start: starting virtual address of the region * end: end virtual address of the region + * linesz: dcache line size * fixup: optional label to branch to on user fault - * Corrupts: start, end, tmp1, tmp2 + * Corrupts: start, end, tmp */ - .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup - dcache_line_size \tmp1, \tmp2 - sub \tmp2, \tmp1, #1 - bic \start, \start, \tmp2 + .macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup + sub \tmp, \linesz, #1 + bic \start, \start, \tmp .Ldcache_op\@: .ifc \op, cvau __dcache_op_workaround_clean_cache \op, \start @@ -436,7 +411,7 @@ alternative_endif .endif .endif .endif - add \start, \start, \tmp1 + add \start, \start, \linesz cmp \start, \end b.lo .Ldcache_op\@ dsb \domain @@ -445,6 +420,22 @@ alternative_endif .endm /* + * Macro to perform a data cache maintenance for the interval + * [start, end) + * + * op: operation passed to dc instruction + * domain: domain used in dsb instruciton + * start: starting virtual address of the region + * end: end virtual address of the region + * fixup: optional label to branch to on user fault + * Corrupts: start, end, tmp1, tmp2 + */ + .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup + dcache_line_size \tmp1, \tmp2 + dcache_by_myline_op \op, \domain, \start, \end, \tmp1, \tmp2, \fixup + .endm + +/* * Macro to perform an instruction cache maintenance for the interval * [start, end) * @@ -468,6 +459,25 @@ alternative_endif .endm /* + * To prevent the possibility of old and new partial table walks being visible + * in the tlb, switch the ttbr to a zero page when we invalidate the old + * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i + * Even switching to our copied tables will cause a changed output address at + * each stage of the walk. + */ + .macro break_before_make_ttbr_switch zero_page, page_table, tmp, tmp2 + phys_to_ttbr \tmp, \zero_page + msr ttbr1_el1, \tmp + isb + tlbi vmalle1 + dsb nsh + phys_to_ttbr \tmp, \page_table + offset_ttbr1 \tmp, \tmp2 + msr ttbr1_el1, \tmp + isb + .endm + +/* * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present */ .macro reset_pmuserenr_el0, tmpreg diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 29f97eb3dad4..a305ce256090 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -227,6 +227,9 @@ #define ESR_ELx_SYS64_ISS_SYS_CNTVCT (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 2, 14, 0) | \ ESR_ELx_SYS64_ISS_DIR_READ) +#define ESR_ELx_SYS64_ISS_SYS_CNTVCTSS (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 6, 14, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) + #define ESR_ELx_SYS64_ISS_SYS_CNTFRQ (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \ ESR_ELx_SYS64_ISS_DIR_READ) @@ -317,6 +320,9 @@ #define ESR_ELx_CP15_64_ISS_SYS_CNTVCT (ESR_ELx_CP15_64_ISS_SYS_VAL(1, 14) | \ ESR_ELx_CP15_64_ISS_DIR_READ) +#define ESR_ELx_CP15_64_ISS_SYS_CNTVCTSS (ESR_ELx_CP15_64_ISS_SYS_VAL(9, 14) | \ + ESR_ELx_CP15_64_ISS_DIR_READ) + #define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\ ESR_ELx_CP15_32_ISS_DIR_READ) diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h index b15eb4a3e6b2..8b300dd28def 100644 --- a/arch/arm64/include/asm/extable.h +++ b/arch/arm64/include/asm/extable.h @@ -18,10 +18,21 @@ struct exception_table_entry { int insn, fixup; + short type, data; }; #define ARCH_HAS_RELATIVE_EXTABLE +#define swap_ex_entry_fixup(a, b, tmp, delta) \ +do { \ + (a)->fixup = (b)->fixup + (delta); \ + (b)->fixup = (tmp).fixup - (delta); \ + (a)->type = (b)->type; \ + (b)->type = (tmp).type; \ + (a)->data = (b)->data; \ + (b)->data = (tmp).data; \ +} while (0) + static inline bool in_bpf_jit(struct pt_regs *regs) { if (!IS_ENABLED(CONFIG_BPF_JIT)) @@ -32,16 +43,16 @@ static inline bool in_bpf_jit(struct pt_regs *regs) } #ifdef CONFIG_BPF_JIT -int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, - struct pt_regs *regs); +bool ex_handler_bpf(const struct exception_table_entry *ex, + struct pt_regs *regs); #else /* !CONFIG_BPF_JIT */ static inline -int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, - struct pt_regs *regs) +bool ex_handler_bpf(const struct exception_table_entry *ex, + struct pt_regs *regs) { - return 0; + return false; } #endif /* !CONFIG_BPF_JIT */ -extern int fixup_exception(struct pt_regs *regs); +bool fixup_exception(struct pt_regs *regs); #endif diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index 91fa4baa1a93..347b0cc68f07 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -15,7 +15,7 @@ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS #define ARCH_SUPPORTS_FTRACE_OPS 1 #else -#define MCOUNT_ADDR ((unsigned long)_mcount) +#define MCOUNT_ADDR ((unsigned long)function_nocfi(_mcount)) #endif /* The BL at the callsite's adjusted rec->ip */ diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 8e41faa37c69..bc06691d2062 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -25,19 +25,14 @@ do { \ " cbz %w0, 3f\n" \ " sub %w4, %w4, %w0\n" \ " cbnz %w4, 1b\n" \ -" mov %w0, %w7\n" \ +" mov %w0, %w6\n" \ "3:\n" \ " dmb ish\n" \ -" .pushsection .fixup,\"ax\"\n" \ -" .align 2\n" \ -"4: mov %w0, %w6\n" \ -" b 3b\n" \ -" .popsection\n" \ - _ASM_EXTABLE(1b, 4b) \ - _ASM_EXTABLE(2b, 4b) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w0) \ + _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %w0) \ : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp), \ "+r" (loops) \ - : "r" (oparg), "Ir" (-EFAULT), "Ir" (-EAGAIN) \ + : "r" (oparg), "Ir" (-EAGAIN) \ : "memory"); \ uaccess_disable_privileged(); \ } while (0) @@ -105,18 +100,14 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr, " cbz %w3, 3f\n" " sub %w4, %w4, %w3\n" " cbnz %w4, 1b\n" -" mov %w0, %w8\n" +" mov %w0, %w7\n" "3:\n" " dmb ish\n" "4:\n" -" .pushsection .fixup,\"ax\"\n" -"5: mov %w0, %w7\n" -" b 4b\n" -" .popsection\n" - _ASM_EXTABLE(1b, 5b) - _ASM_EXTABLE(2b, 5b) + _ASM_EXTABLE_UACCESS_ERR(1b, 4b, %w0) + _ASM_EXTABLE_UACCESS_ERR(2b, 4b, %w0) : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp), "+r" (loops) - : "r" (oldval), "r" (newval), "Ir" (-EFAULT), "Ir" (-EAGAIN) + : "r" (oldval), "r" (newval), "Ir" (-EAGAIN) : "memory"); uaccess_disable_privileged(); diff --git a/arch/arm64/include/asm/gpr-num.h b/arch/arm64/include/asm/gpr-num.h new file mode 100644 index 000000000000..05da4a7c5788 --- /dev/null +++ b/arch/arm64/include/asm/gpr-num.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_GPR_NUM_H +#define __ASM_GPR_NUM_H + +#ifdef __ASSEMBLY__ + + .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 + .equ .L__gpr_num_x\num, \num + .equ .L__gpr_num_w\num, \num + .endr + .equ .L__gpr_num_xzr, 31 + .equ .L__gpr_num_wzr, 31 + +#else /* __ASSEMBLY__ */ + +#define __DEFINE_ASM_GPR_NUMS \ +" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ +" .equ .L__gpr_num_x\\num, \\num\n" \ +" .equ .L__gpr_num_w\\num, \\num\n" \ +" .endr\n" \ +" .equ .L__gpr_num_xzr, 31\n" \ +" .equ .L__gpr_num_wzr, 31\n" + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_GPR_NUM_H */ diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 8c129db8232a..b100e0055eab 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -105,6 +105,7 @@ #define KERNEL_HWCAP_RNG __khwcap2_feature(RNG) #define KERNEL_HWCAP_BTI __khwcap2_feature(BTI) #define KERNEL_HWCAP_MTE __khwcap2_feature(MTE) +#define KERNEL_HWCAP_ECV __khwcap2_feature(ECV) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 00dbcc71aeb2..9839bfc163d7 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -90,12 +90,24 @@ static inline void crash_prepare_suspend(void) {} static inline void crash_post_resume(void) {} #endif +#if defined(CONFIG_KEXEC_CORE) +void cpu_soft_restart(unsigned long el2_switch, unsigned long entry, + unsigned long arg0, unsigned long arg1, + unsigned long arg2); +#endif + #define ARCH_HAS_KIMAGE_ARCH struct kimage_arch { void *dtb; phys_addr_t dtb_mem; phys_addr_t kern_reloc; + phys_addr_t el2_vectors; + phys_addr_t ttbr0; + phys_addr_t ttbr1; + phys_addr_t zero_page; + unsigned long phys_offset; + unsigned long t0sz; }; #ifdef CONFIG_KEXEC_FILE diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index e86045ac43ba..6486b1db268e 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -263,9 +263,10 @@ extern u64 __kvm_get_mdcr_el2(void); /* * KVM extable for unexpected exceptions. - * In the same format _asm_extable, but output to a different section so that - * it can be mapped to EL2. The KVM version is not sorted. The caller must - * ensure: + * Create a struct kvm_exception_table_entry output to a section that can be + * mapped by EL2. The table is not sorted. + * + * The caller must ensure: * x18 has the hypervisor value to allow any Shadow-Call-Stack instrumented * code to write to it, and that SPSR_EL2 and ELR_EL2 are restored by the fixup. */ diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index f4ba93d4ffeb..6770667b34a3 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -116,6 +116,30 @@ static inline void cpu_install_idmap(void) } /* + * Load our new page tables. A strict BBM approach requires that we ensure that + * TLBs are free of any entries that may overlap with the global mappings we are + * about to install. + * + * For a real hibernate/resume/kexec cycle TTBR0 currently points to a zero + * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI runtime + * services), while for a userspace-driven test_resume cycle it points to + * userspace page tables (and we must point it at a zero page ourselves). + * + * We change T0SZ as part of installing the idmap. This is undone by + * cpu_uninstall_idmap() in __cpu_suspend_exit(). + */ +static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz) +{ + cpu_set_reserved_ttbr0(); + local_flush_tlb_all(); + __cpu_set_tcr_t0sz(t0sz); + + /* avoid cpu_switch_mm() and its SW-PAN and CNP interactions */ + write_sysreg(ttbr0, ttbr0_el1); + isb(); +} + +/* * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, * avoiding the possibility of conflicting TLB entries being allocated. */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index dfa76afa0ccf..84fbb52b4224 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1022,6 +1022,11 @@ static inline pgprot_t arch_filter_pgprot(pgprot_t prot) return PAGE_READONLY_EXEC; } +static inline bool pud_sect_supported(void) +{ + return PAGE_SIZE == SZ_4K; +} + #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index e4ad9db53af1..152cb35bf9df 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -21,5 +21,6 @@ extern char __exittext_begin[], __exittext_end[]; extern char __irqentry_text_start[], __irqentry_text_end[]; extern char __mmuoff_data_start[], __mmuoff_data_end[]; extern char __entry_tramp_text_start[], __entry_tramp_text_end[]; +extern char __relocate_new_kernel_start[], __relocate_new_kernel_end[]; #endif /* __ASM_SECTIONS_H */ diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h index d3320618ed14..6437df661700 100644 --- a/arch/arm64/include/asm/setup.h +++ b/arch/arm64/include/asm/setup.h @@ -8,4 +8,10 @@ void *get_early_fdt_ptr(void); void early_fdt_map(u64 dt_phys); +/* + * These two variables are used in the head.S file. + */ +extern phys_addr_t __fdt_pointer __initdata; +extern u64 __cacheline_aligned boot_args[4]; + #endif diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index f51d5912b41c..027dbe004df4 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -13,6 +13,8 @@ #include <linux/stringify.h> #include <linux/kasan-tags.h> +#include <asm/gpr-num.h> + /* * ARMv8 ARM reserves the following encoding for system registers: * (Ref: ARMv8 ARM, Section: "System instruction class encoding overview", @@ -507,6 +509,9 @@ #define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) +#define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5) +#define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6) + #define SYS_CNTP_TVAL_EL0 sys_reg(3, 3, 14, 2, 0) #define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1) #define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2) @@ -1195,17 +1200,12 @@ #ifdef __ASSEMBLY__ - .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 - .equ .L__reg_num_x\num, \num - .endr - .equ .L__reg_num_xzr, 31 - .macro mrs_s, rt, sreg - __emit_inst(0xd5200000|(\sreg)|(.L__reg_num_\rt)) + __emit_inst(0xd5200000|(\sreg)|(.L__gpr_num_\rt)) .endm .macro msr_s, sreg, rt - __emit_inst(0xd5000000|(\sreg)|(.L__reg_num_\rt)) + __emit_inst(0xd5000000|(\sreg)|(.L__gpr_num_\rt)) .endm #else @@ -1214,22 +1214,16 @@ #include <linux/types.h> #include <asm/alternative.h> -#define __DEFINE_MRS_MSR_S_REGNUM \ -" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ -" .equ .L__reg_num_x\\num, \\num\n" \ -" .endr\n" \ -" .equ .L__reg_num_xzr, 31\n" - #define DEFINE_MRS_S \ - __DEFINE_MRS_MSR_S_REGNUM \ + __DEFINE_ASM_GPR_NUMS \ " .macro mrs_s, rt, sreg\n" \ - __emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt)) \ + __emit_inst(0xd5200000|(\\sreg)|(.L__gpr_num_\\rt)) \ " .endm\n" #define DEFINE_MSR_S \ - __DEFINE_MRS_MSR_S_REGNUM \ + __DEFINE_ASM_GPR_NUMS \ " .macro msr_s, sreg, rt\n" \ - __emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt)) \ + __emit_inst(0xd5000000|(\\sreg)|(.L__gpr_num_\\rt)) \ " .endm\n" #define UNDEFINE_MRS_S \ diff --git a/arch/arm64/include/asm/trans_pgd.h b/arch/arm64/include/asm/trans_pgd.h index 5d08e5adf3d5..033d400a4ea4 100644 --- a/arch/arm64/include/asm/trans_pgd.h +++ b/arch/arm64/include/asm/trans_pgd.h @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (c) 2020, Microsoft Corporation. - * Pavel Tatashin <pasha.tatashin@soleen.com> + * Copyright (c) 2021, Microsoft Corporation. + * Pasha Tatashin <pasha.tatashin@soleen.com> */ #ifndef _ASM_TRANS_TABLE_H @@ -15,7 +15,7 @@ /* * trans_alloc_page * - Allocator that should return exactly one zeroed page, if this - * allocator fails, trans_pgd_create_copy() and trans_pgd_map_page() + * allocator fails, trans_pgd_create_copy() and trans_pgd_idmap_page() * return -ENOMEM error. * * trans_alloc_arg @@ -30,10 +30,12 @@ struct trans_pgd_info { int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **trans_pgd, unsigned long start, unsigned long end); -int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd, - void *page, unsigned long dst_addr, pgprot_t pgprot); - int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0, unsigned long *t0sz, void *page); +int trans_pgd_copy_el2_vectors(struct trans_pgd_info *info, + phys_addr_t *el2_vectors); + +extern char trans_pgd_stub_vectors[]; + #endif /* _ASM_TRANS_TABLE_H */ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 315354047d69..6e2e0b7031ab 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -18,6 +18,7 @@ #include <linux/kasan-checks.h> #include <linux/string.h> +#include <asm/asm-extable.h> #include <asm/cpufeature.h> #include <asm/mmu.h> #include <asm/mte.h> @@ -70,12 +71,6 @@ static inline unsigned long __range_ok(const void __user *addr, unsigned long si #define access_ok(addr, size) __range_ok(addr, size) -#define _ASM_EXTABLE(from, to) \ - " .pushsection __ex_table, \"a\"\n" \ - " .align 3\n" \ - " .long (" #from " - .), (" #to " - .)\n" \ - " .popsection\n" - /* * User access enabling/disabling. */ @@ -260,15 +255,9 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) asm volatile( \ "1: " load " " reg "1, [%2]\n" \ "2:\n" \ - " .section .fixup, \"ax\"\n" \ - " .align 2\n" \ - "3: mov %w0, %3\n" \ - " mov %1, #0\n" \ - " b 2b\n" \ - " .previous\n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \ : "+r" (err), "=&r" (x) \ - : "r" (addr), "i" (-EFAULT)) + : "r" (addr)) #define __raw_get_mem(ldr, x, ptr, err) \ do { \ @@ -337,14 +326,9 @@ do { \ asm volatile( \ "1: " store " " reg "1, [%2]\n" \ "2:\n" \ - " .section .fixup,\"ax\"\n" \ - " .align 2\n" \ - "3: mov %w0, %3\n" \ - " b 2b\n" \ - " .previous\n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0) \ : "+r" (err) \ - : "r" (x), "r" (addr), "i" (-EFAULT)) + : "r" (x), "r" (addr)) #define __raw_put_mem(str, x, ptr, err) \ do { \ diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 7379f35ae2c6..3c8af033a997 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -67,6 +67,8 @@ */ extern u32 __boot_cpu_mode[2]; +#define ARM64_VECTOR_TABLE_LEN SZ_2K + void __hyp_set_vectors(phys_addr_t phys_vector_base); void __hyp_reset_vectors(void); @@ -128,6 +130,11 @@ static __always_inline bool is_protected_kvm_enabled(void) return cpus_have_final_cap(ARM64_KVM_PROTECTED_MODE); } +static inline bool is_hyp_nvhe(void) +{ + return is_hyp_mode_available() && !is_kernel_in_hyp_mode(); +} + #endif /* __ASSEMBLY__ */ #endif /* ! __ASM__VIRT_H */ diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h index 7a22aeea9bb5..b9185503feae 100644 --- a/arch/arm64/include/asm/vmalloc.h +++ b/arch/arm64/include/asm/vmalloc.h @@ -2,6 +2,7 @@ #define _ASM_ARM64_VMALLOC_H #include <asm/page.h> +#include <asm/pgtable.h> #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP @@ -9,10 +10,9 @@ static inline bool arch_vmap_pud_supported(pgprot_t prot) { /* - * Only 4k granule supports level 1 block mappings. * SW table walks can't handle removal of intermediate entries. */ - return IS_ENABLED(CONFIG_ARM64_4K_PAGES) && + return pud_sect_supported() && !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS); } diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h index 2dcb104c645b..1c8e4f2490bf 100644 --- a/arch/arm64/include/asm/word-at-a-time.h +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -53,29 +53,16 @@ static inline unsigned long find_zero(unsigned long mask) */ static inline unsigned long load_unaligned_zeropad(const void *addr) { - unsigned long ret, tmp; + unsigned long ret; __uaccess_enable_tco_async(); /* Load word from unaligned pointer addr */ asm( - "1: ldr %0, %3\n" + "1: ldr %0, %2\n" "2:\n" - " .pushsection .fixup,\"ax\"\n" - " .align 2\n" - "3: bic %1, %2, #0x7\n" - " ldr %0, [%1]\n" - " and %1, %2, #0x7\n" - " lsl %1, %1, #0x3\n" -#ifndef __AARCH64EB__ - " lsr %0, %0, %1\n" -#else - " lsl %0, %0, %1\n" -#endif - " b 2b\n" - " .popsection\n" - _ASM_EXTABLE(1b, 3b) - : "=&r" (ret), "=&r" (tmp) + _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(1b, 2b, %0, %1) + : "=&r" (ret) : "r" (addr), "Q" (*(unsigned long *)addr)); __uaccess_disable_tco_async(); diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index b8f41aa234ee..7b23b16f21ce 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -75,5 +75,6 @@ #define HWCAP2_RNG (1 << 16) #define HWCAP2_BTI (1 << 17) #define HWCAP2_MTE (1 << 18) +#define HWCAP2_ECV (1 << 19) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 0e86e8b9cedd..6875a16b09d2 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -279,7 +279,7 @@ static void __init register_insn_emulation_sysctl(void) do { \ uaccess_enable_privileged(); \ __asm__ __volatile__( \ - " mov %w3, %w7\n" \ + " mov %w3, %w6\n" \ "0: ldxr"B" %w2, [%4]\n" \ "1: stxr"B" %w0, %w1, [%4]\n" \ " cbz %w0, 2f\n" \ @@ -290,16 +290,10 @@ do { \ "2:\n" \ " mov %w1, %w2\n" \ "3:\n" \ - " .pushsection .fixup,\"ax\"\n" \ - " .align 2\n" \ - "4: mov %w0, %w6\n" \ - " b 3b\n" \ - " .popsection" \ - _ASM_EXTABLE(0b, 4b) \ - _ASM_EXTABLE(1b, 4b) \ + _ASM_EXTABLE_UACCESS_ERR(0b, 3b, %w0) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w0) \ : "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2) \ : "r" ((unsigned long)addr), "i" (-EAGAIN), \ - "i" (-EFAULT), \ "i" (__SWP_LL_SC_LOOPS) \ : "memory"); \ uaccess_disable_privileged(); \ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 551427ae8cc5..2124357c2075 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -9,6 +9,7 @@ #include <linux/arm_sdei.h> #include <linux/sched.h> +#include <linux/kexec.h> #include <linux/mm.h> #include <linux/dma-mapping.h> #include <linux/kvm_host.h> @@ -171,5 +172,15 @@ int main(void) #endif BLANK(); #endif +#ifdef CONFIG_KEXEC_CORE + DEFINE(KIMAGE_ARCH_DTB_MEM, offsetof(struct kimage, arch.dtb_mem)); + DEFINE(KIMAGE_ARCH_EL2_VECTORS, offsetof(struct kimage, arch.el2_vectors)); + DEFINE(KIMAGE_ARCH_ZERO_PAGE, offsetof(struct kimage, arch.zero_page)); + DEFINE(KIMAGE_ARCH_PHYS_OFFSET, offsetof(struct kimage, arch.phys_offset)); + DEFINE(KIMAGE_ARCH_TTBR1, offsetof(struct kimage, arch.ttbr1)); + DEFINE(KIMAGE_HEAD, offsetof(struct kimage, head)); + DEFINE(KIMAGE_START, offsetof(struct kimage, start)); + BLANK(); +#endif return 0; } diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S index d47ff63a5b66..48a8af97faa9 100644 --- a/arch/arm64/kernel/cpu-reset.S +++ b/arch/arm64/kernel/cpu-reset.S @@ -16,8 +16,7 @@ .pushsection .idmap.text, "awx" /* - * __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for - * cpu_soft_restart. + * cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) * * @el2_switch: Flag to indicate a switch to EL2 is needed. * @entry: Location to jump to for soft reset. @@ -29,7 +28,7 @@ * branch to what would be the reset vector. It must be executed with the * flat identity mapping. */ -SYM_CODE_START(__cpu_soft_restart) +SYM_CODE_START(cpu_soft_restart) mov_q x12, INIT_SCTLR_EL1_MMU_OFF pre_disable_mmu_workaround /* @@ -48,6 +47,6 @@ SYM_CODE_START(__cpu_soft_restart) mov x1, x3 // arg1 mov x2, x4 // arg2 br x8 -SYM_CODE_END(__cpu_soft_restart) +SYM_CODE_END(cpu_soft_restart) .popsection diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h deleted file mode 100644 index 9a7b1262ef17..000000000000 --- a/arch/arm64/kernel/cpu-reset.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * CPU reset routines - * - * Copyright (C) 2015 Huawei Futurewei Technologies. - */ - -#ifndef _ARM64_CPU_RESET_H -#define _ARM64_CPU_RESET_H - -#include <asm/virt.h> - -void __cpu_soft_restart(unsigned long el2_switch, unsigned long entry, - unsigned long arg0, unsigned long arg1, unsigned long arg2); - -static inline void __noreturn __nocfi cpu_soft_restart(unsigned long entry, - unsigned long arg0, - unsigned long arg1, - unsigned long arg2) -{ - typeof(__cpu_soft_restart) *restart; - - unsigned long el2_switch = !is_kernel_in_hyp_mode() && - is_hyp_mode_available(); - restart = (void *)__pa_symbol(function_nocfi(__cpu_soft_restart)); - - cpu_install_idmap(); - restart(el2_switch, entry, arg0, arg1, arg2); - unreachable(); -} - -#endif diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9e3e8ad75f20..e6fe81782f68 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -279,7 +279,7 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EXS_SHIFT, 4, 0), /* @@ -1930,6 +1930,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .min_field_value = 1, }, + { + .desc = "Enhanced Counter Virtualization", + .capability = ARM64_HAS_ECV, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64MMFR0_EL1, + .field_pos = ID_AA64MMFR0_ECV_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = 1, + }, #ifdef CONFIG_ARM64_PAN { .desc = "Privileged Access Never", @@ -2461,6 +2471,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #ifdef CONFIG_ARM64_MTE HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), #endif /* CONFIG_ARM64_MTE */ + HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), {}, }; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 87731fea5e41..6e27b759056a 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -94,6 +94,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_RNG] = "rng", [KERNEL_HWCAP_BTI] = "bti", [KERNEL_HWCAP_MTE] = "mte", + [KERNEL_HWCAP_ECV] = "ecv", }; #ifdef CONFIG_COMPAT diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index 81c0186a5e32..0e1d9c3c6a93 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -16,26 +16,6 @@ #include <asm/virt.h> /* - * To prevent the possibility of old and new partial table walks being visible - * in the tlb, switch the ttbr to a zero page when we invalidate the old - * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i - * Even switching to our copied tables will cause a changed output address at - * each stage of the walk. - */ -.macro break_before_make_ttbr_switch zero_page, page_table, tmp, tmp2 - phys_to_ttbr \tmp, \zero_page - msr ttbr1_el1, \tmp - isb - tlbi vmalle1 - dsb nsh - phys_to_ttbr \tmp, \page_table - offset_ttbr1 \tmp, \tmp2 - msr ttbr1_el1, \tmp - isb -.endm - - -/* * Resume from hibernate * * Loads temporary page tables then restores the memory image. @@ -112,56 +92,4 @@ alternative_insn "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE hvc #0 3: ret SYM_CODE_END(swsusp_arch_suspend_exit) - -/* - * Restore the hyp stub. - * This must be done before the hibernate page is unmapped by _cpu_resume(), - * but happens before any of the hyp-stub's code is cleaned to PoC. - * - * x24: The physical address of __hyp_stub_vectors - */ -SYM_CODE_START_LOCAL(el1_sync) - msr vbar_el2, x24 - eret -SYM_CODE_END(el1_sync) - -.macro invalid_vector label -SYM_CODE_START_LOCAL(\label) - b \label -SYM_CODE_END(\label) -.endm - - invalid_vector el2_sync_invalid - invalid_vector el2_irq_invalid - invalid_vector el2_fiq_invalid - invalid_vector el2_error_invalid - invalid_vector el1_sync_invalid - invalid_vector el1_irq_invalid - invalid_vector el1_fiq_invalid - invalid_vector el1_error_invalid - -/* el2 vectors - switch el2 here while we restore the memory image. */ - .align 11 -SYM_CODE_START(hibernate_el2_vectors) - ventry el2_sync_invalid // Synchronous EL2t - ventry el2_irq_invalid // IRQ EL2t - ventry el2_fiq_invalid // FIQ EL2t - ventry el2_error_invalid // Error EL2t - - ventry el2_sync_invalid // Synchronous EL2h - ventry el2_irq_invalid // IRQ EL2h - ventry el2_fiq_invalid // FIQ EL2h - ventry el2_error_invalid // Error EL2h - - ventry el1_sync // Synchronous 64-bit EL1 - ventry el1_irq_invalid // IRQ 64-bit EL1 - ventry el1_fiq_invalid // FIQ 64-bit EL1 - ventry el1_error_invalid // Error 64-bit EL1 - - ventry el1_sync_invalid // Synchronous 32-bit EL1 - ventry el1_irq_invalid // IRQ 32-bit EL1 - ventry el1_fiq_invalid // FIQ 32-bit EL1 - ventry el1_error_invalid // Error 32-bit EL1 -SYM_CODE_END(hibernate_el2_vectors) - .popsection diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 46a0b4d6e251..2758f75d6809 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -49,10 +49,7 @@ extern int in_suspend; /* Do we need to reset el2? */ -#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) - -/* temporary el2 vectors in the __hibernate_exit_text section. */ -extern char hibernate_el2_vectors[]; +#define el2_reset_needed() (is_hyp_nvhe()) /* hyp-stub vectors, used to restore el2 during resume from hibernate. */ extern char __hyp_stub_vectors[]; @@ -215,26 +212,7 @@ static int create_safe_exec_page(void *src_start, size_t length, if (rc) return rc; - /* - * Load our new page tables. A strict BBM approach requires that we - * ensure that TLBs are free of any entries that may overlap with the - * global mappings we are about to install. - * - * For a real hibernate/resume cycle TTBR0 currently points to a zero - * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI - * runtime services), while for a userspace-driven test_resume cycle it - * points to userspace page tables (and we must point it at a zero page - * ourselves). - * - * We change T0SZ as part of installing the idmap. This is undone by - * cpu_uninstall_idmap() in __cpu_suspend_exit(). - */ - cpu_set_reserved_ttbr0(); - local_flush_tlb_all(); - __cpu_set_tcr_t0sz(t0sz); - write_sysreg(trans_ttbr0, ttbr0_el1); - isb(); - + cpu_install_ttbr0(trans_ttbr0, t0sz); *phys_dst_addr = virt_to_phys(page); return 0; @@ -434,6 +412,7 @@ int swsusp_arch_resume(void) void *zero_page; size_t exit_size; pgd_t *tmp_pg_dir; + phys_addr_t el2_vectors; void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *, void *, phys_addr_t, phys_addr_t); struct trans_pgd_info trans_info = { @@ -461,6 +440,14 @@ int swsusp_arch_resume(void) return -ENOMEM; } + if (el2_reset_needed()) { + rc = trans_pgd_copy_el2_vectors(&trans_info, &el2_vectors); + if (rc) { + pr_err("Failed to setup el2 vectors\n"); + return rc; + } + } + exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start; /* * Copy swsusp_arch_suspend_exit() to a safe page. This will generate @@ -474,25 +461,13 @@ int swsusp_arch_resume(void) } /* - * The hibernate exit text contains a set of el2 vectors, that will - * be executed at el2 with the mmu off in order to reload hyp-stub. - */ - dcache_clean_inval_poc((unsigned long)hibernate_exit, - (unsigned long)hibernate_exit + exit_size); - - /* * KASLR will cause the el2 vectors to be in a different location in * the resumed kernel. Load hibernate's temporary copy into el2. * * We can skip this step if we booted at EL1, or are running with VHE. */ - if (el2_reset_needed()) { - phys_addr_t el2_vectors = (phys_addr_t)hibernate_exit; - el2_vectors += hibernate_el2_vectors - - __hibernate_exit_text_start; /* offset */ - + if (el2_reset_needed()) __hyp_set_vectors(el2_vectors); - } hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1, resume_hdr.reenter_kernel, restore_pblist, diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 213d56c14f60..1038494135c8 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -21,12 +21,8 @@ #include <asm/mmu.h> #include <asm/mmu_context.h> #include <asm/page.h> - -#include "cpu-reset.h" - -/* Global variables for the arm64_relocate_new_kernel routine. */ -extern const unsigned char arm64_relocate_new_kernel[]; -extern const unsigned long arm64_relocate_new_kernel_size; +#include <asm/sections.h> +#include <asm/trans_pgd.h> /** * kexec_image_info - For debugging output. @@ -43,7 +39,9 @@ static void _kexec_image_info(const char *func, int line, pr_debug(" start: %lx\n", kimage->start); pr_debug(" head: %lx\n", kimage->head); pr_debug(" nr_segments: %lu\n", kimage->nr_segments); + pr_debug(" dtb_mem: %pa\n", &kimage->arch.dtb_mem); pr_debug(" kern_reloc: %pa\n", &kimage->arch.kern_reloc); + pr_debug(" el2_vectors: %pa\n", &kimage->arch.el2_vectors); for (i = 0; i < kimage->nr_segments; i++) { pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n", @@ -60,29 +58,6 @@ void machine_kexec_cleanup(struct kimage *kimage) /* Empty routine needed to avoid build errors. */ } -int machine_kexec_post_load(struct kimage *kimage) -{ - void *reloc_code = page_to_virt(kimage->control_code_page); - - memcpy(reloc_code, arm64_relocate_new_kernel, - arm64_relocate_new_kernel_size); - kimage->arch.kern_reloc = __pa(reloc_code); - kexec_image_info(kimage); - - /* - * For execution with the MMU off, reloc_code needs to be cleaned to the - * PoC and invalidated from the I-cache. - */ - dcache_clean_inval_poc((unsigned long)reloc_code, - (unsigned long)reloc_code + - arm64_relocate_new_kernel_size); - icache_inval_pou((uintptr_t)reloc_code, - (uintptr_t)reloc_code + - arm64_relocate_new_kernel_size); - - return 0; -} - /** * machine_kexec_prepare - Prepare for a kexec reboot. * @@ -101,45 +76,6 @@ int machine_kexec_prepare(struct kimage *kimage) } /** - * kexec_list_flush - Helper to flush the kimage list and source pages to PoC. - */ -static void kexec_list_flush(struct kimage *kimage) -{ - kimage_entry_t *entry; - - for (entry = &kimage->head; ; entry++) { - unsigned int flag; - unsigned long addr; - - /* flush the list entries. */ - dcache_clean_inval_poc((unsigned long)entry, - (unsigned long)entry + - sizeof(kimage_entry_t)); - - flag = *entry & IND_FLAGS; - if (flag == IND_DONE) - break; - - addr = (unsigned long)phys_to_virt(*entry & PAGE_MASK); - - switch (flag) { - case IND_INDIRECTION: - /* Set entry point just before the new list page. */ - entry = (kimage_entry_t *)addr - 1; - break; - case IND_SOURCE: - /* flush the source pages. */ - dcache_clean_inval_poc(addr, addr + PAGE_SIZE); - break; - case IND_DESTINATION: - break; - default: - BUG(); - } - } -} - -/** * kexec_segment_flush - Helper to flush the kimage segments to PoC. */ static void kexec_segment_flush(const struct kimage *kimage) @@ -163,6 +99,75 @@ static void kexec_segment_flush(const struct kimage *kimage) } } +/* Allocates pages for kexec page table */ +static void *kexec_page_alloc(void *arg) +{ + struct kimage *kimage = (struct kimage *)arg; + struct page *page = kimage_alloc_control_pages(kimage, 0); + + if (!page) + return NULL; + + memset(page_address(page), 0, PAGE_SIZE); + + return page_address(page); +} + +int machine_kexec_post_load(struct kimage *kimage) +{ + int rc; + pgd_t *trans_pgd; + void *reloc_code = page_to_virt(kimage->control_code_page); + long reloc_size; + struct trans_pgd_info info = { + .trans_alloc_page = kexec_page_alloc, + .trans_alloc_arg = kimage, + }; + + /* If in place, relocation is not used, only flush next kernel */ + if (kimage->head & IND_DONE) { + kexec_segment_flush(kimage); + kexec_image_info(kimage); + return 0; + } + + kimage->arch.el2_vectors = 0; + if (is_hyp_nvhe()) { + rc = trans_pgd_copy_el2_vectors(&info, + &kimage->arch.el2_vectors); + if (rc) + return rc; + } + + /* Create a copy of the linear map */ + trans_pgd = kexec_page_alloc(kimage); + if (!trans_pgd) + return -ENOMEM; + rc = trans_pgd_create_copy(&info, &trans_pgd, PAGE_OFFSET, PAGE_END); + if (rc) + return rc; + kimage->arch.ttbr1 = __pa(trans_pgd); + kimage->arch.zero_page = __pa(empty_zero_page); + + reloc_size = __relocate_new_kernel_end - __relocate_new_kernel_start; + memcpy(reloc_code, __relocate_new_kernel_start, reloc_size); + kimage->arch.kern_reloc = __pa(reloc_code); + rc = trans_pgd_idmap_page(&info, &kimage->arch.ttbr0, + &kimage->arch.t0sz, reloc_code); + if (rc) + return rc; + kimage->arch.phys_offset = virt_to_phys(kimage) - (long)kimage; + + /* Flush the reloc_code in preparation for its execution. */ + dcache_clean_inval_poc((unsigned long)reloc_code, + (unsigned long)reloc_code + reloc_size); + icache_inval_pou((uintptr_t)reloc_code, + (uintptr_t)reloc_code + reloc_size); + kexec_image_info(kimage); + + return 0; +} + /** * machine_kexec - Do the kexec reboot. * @@ -180,31 +185,35 @@ void machine_kexec(struct kimage *kimage) WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()), "Some CPUs may be stale, kdump will be unreliable.\n"); - /* Flush the kimage list and its buffers. */ - kexec_list_flush(kimage); - - /* Flush the new image if already in place. */ - if ((kimage != kexec_crash_image) && (kimage->head & IND_DONE)) - kexec_segment_flush(kimage); - pr_info("Bye!\n"); local_daif_mask(); /* - * cpu_soft_restart will shutdown the MMU, disable data caches, then - * transfer control to the kern_reloc which contains a copy of - * the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel - * uses physical addressing to relocate the new image to its final - * position and transfers control to the image entry point when the - * relocation is complete. + * Both restart and kernel_reloc will shutdown the MMU, disable data + * caches. However, restart will start new kernel or purgatory directly, + * kernel_reloc contains the body of arm64_relocate_new_kernel * In kexec case, kimage->start points to purgatory assuming that * kernel entry and dtb address are embedded in purgatory by * userspace (kexec-tools). * In kexec_file case, the kernel starts directly without purgatory. */ - cpu_soft_restart(kimage->arch.kern_reloc, kimage->head, kimage->start, - kimage->arch.dtb_mem); + if (kimage->head & IND_DONE) { + typeof(cpu_soft_restart) *restart; + + cpu_install_idmap(); + restart = (void *)__pa_symbol(function_nocfi(cpu_soft_restart)); + restart(is_hyp_nvhe(), kimage->start, kimage->arch.dtb_mem, + 0, 0); + } else { + void (*kernel_reloc)(struct kimage *kimage); + + if (is_hyp_nvhe()) + __hyp_set_vectors(kimage->arch.el2_vectors); + cpu_install_ttbr0(kimage->arch.ttbr0, kimage->arch.t0sz); + kernel_reloc = (void *)kimage->arch.kern_reloc; + kernel_reloc(kimage); + } BUG(); /* Should never get here. */ } @@ -261,8 +270,6 @@ void arch_kexec_protect_crashkres(void) { int i; - kexec_segment_flush(kexec_crash_image); - for (i = 0; i < kexec_crash_image->nr_segments; i++) set_memory_valid( __phys_to_virt(kexec_crash_image->segment[i].mem), diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index b78ea5de97a4..f0a3df9e18a3 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -4,6 +4,8 @@ * * Copyright (C) Linaro. * Copyright (C) Huawei Futurewei Technologies. + * Copyright (C) 2021, Microsoft Corporation. + * Pasha Tatashin <pasha.tatashin@soleen.com> */ #include <linux/kexec.h> @@ -13,7 +15,16 @@ #include <asm/kexec.h> #include <asm/page.h> #include <asm/sysreg.h> +#include <asm/virt.h> +.macro turn_off_mmu tmp1, tmp2 + mov_q \tmp1, INIT_SCTLR_EL1_MMU_OFF + pre_disable_mmu_workaround + msr sctlr_el1, \tmp1 + isb +.endm + +.section ".kexec_relocate.text", "ax" /* * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it. * @@ -27,33 +38,24 @@ */ SYM_CODE_START(arm64_relocate_new_kernel) /* Setup the list loop variables. */ - mov x18, x2 /* x18 = dtb address */ - mov x17, x1 /* x17 = kimage_start */ - mov x16, x0 /* x16 = kimage_head */ - mov x14, xzr /* x14 = entry ptr */ - mov x13, xzr /* x13 = copy dest */ - /* Check if the new image needs relocation. */ - tbnz x16, IND_DONE_BIT, .Ldone + ldr x18, [x0, #KIMAGE_ARCH_ZERO_PAGE] /* x18 = zero page for BBM */ + ldr x17, [x0, #KIMAGE_ARCH_TTBR1] /* x17 = linear map copy */ + ldr x16, [x0, #KIMAGE_HEAD] /* x16 = kimage_head */ + ldr x22, [x0, #KIMAGE_ARCH_PHYS_OFFSET] /* x22 phys_offset */ raw_dcache_line_size x15, x1 /* x15 = dcache line size */ + break_before_make_ttbr_switch x18, x17, x1, x2 /* set linear map */ .Lloop: and x12, x16, PAGE_MASK /* x12 = addr */ - + sub x12, x12, x22 /* Convert x12 to virt */ /* Test the entry flags. */ .Ltest_source: tbz x16, IND_SOURCE_BIT, .Ltest_indirection /* Invalidate dest page to PoC. */ - mov x2, x13 - add x20, x2, #PAGE_SIZE - sub x1, x15, #1 - bic x2, x2, x1 -2: dc ivac, x2 - add x2, x2, x15 - cmp x2, x20 - b.lo 2b - dsb sy - + mov x19, x13 copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8 + add x1, x19, #PAGE_SIZE + dcache_by_myline_op civac, sy, x19, x1, x15, x20 b .Lnext .Ltest_indirection: tbz x16, IND_INDIRECTION_BIT, .Ltest_destination @@ -65,31 +67,26 @@ SYM_CODE_START(arm64_relocate_new_kernel) .Lnext: ldr x16, [x14], #8 /* entry = *ptr++ */ tbz x16, IND_DONE_BIT, .Lloop /* while (!(entry & DONE)) */ -.Ldone: /* wait for writes from copy_page to finish */ dsb nsh ic iallu dsb nsh isb + ldr x4, [x0, #KIMAGE_START] /* relocation start */ + ldr x1, [x0, #KIMAGE_ARCH_EL2_VECTORS] /* relocation start */ + ldr x0, [x0, #KIMAGE_ARCH_DTB_MEM] /* dtb address */ + turn_off_mmu x12, x13 /* Start new image. */ - mov x0, x18 - mov x1, xzr + cbz x1, .Lel1 + mov x1, x4 /* relocation start */ + mov x2, x0 /* dtb address */ + mov x3, xzr + mov x4, xzr + mov x0, #HVC_SOFT_RESTART + hvc #0 /* Jumps from el2 */ +.Lel1: mov x2, xzr mov x3, xzr - br x17 - + br x4 /* Jumps from el1 */ SYM_CODE_END(arm64_relocate_new_kernel) - -.align 3 /* To keep the 64-bit values below naturally aligned. */ - -.Lcopy_end: -.org KEXEC_CONTROL_PAGE_SIZE - -/* - * arm64_relocate_new_kernel_size - Number of bytes to copy to the - * control_code_page. - */ -.globl arm64_relocate_new_kernel_size -arm64_relocate_new_kernel_size: - .quad .Lcopy_end - arm64_relocate_new_kernel diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 47f77d1234cb..d20620a1c51a 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -202,7 +202,7 @@ unsigned long sdei_arch_get_entry_point(int conduit) * dropped to EL1 because we don't support VHE, then we can't support * SDEI. */ - if (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) { + if (is_hyp_nvhe()) { pr_err("Not supported on this hardware/boot configuration\n"); goto out_err; } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index b03e383d944a..7b21213a570f 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -400,11 +400,11 @@ static int call_undef_hook(struct pt_regs *regs) unsigned long flags; u32 instr; int (*fn)(struct pt_regs *regs, u32 instr) = NULL; - void __user *pc = (void __user *)instruction_pointer(regs); + unsigned long pc = instruction_pointer(regs); if (!user_mode(regs)) { __le32 instr_le; - if (get_kernel_nofault(instr_le, (__force __le32 *)pc)) + if (get_kernel_nofault(instr_le, (__le32 *)pc)) goto exit; instr = le32_to_cpu(instr_le); } else if (compat_thumb_mode(regs)) { @@ -527,14 +527,9 @@ NOKPROBE_SYMBOL(do_ptrauth_fault); "1: " insn ", %1\n" \ " mov %w0, #0\n" \ "2:\n" \ - " .pushsection .fixup,\"ax\"\n" \ - " .align 2\n" \ - "3: mov %w0, %w2\n" \ - " b 2b\n" \ - " .popsection\n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0) \ : "=r" (res) \ - : "r" (address), "i" (-EFAULT)); \ + : "r" (address)); \ uaccess_ttbr0_disable(); \ } @@ -654,6 +649,12 @@ static const struct sys64_hook sys64_hooks[] = { .handler = cntvct_read_handler, }, { + /* Trap read access to CNTVCTSS_EL0 */ + .esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTVCTSS, + .handler = cntvct_read_handler, + }, + { /* Trap read access to CNTFRQ_EL0 */ .esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK, .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTFRQ, @@ -729,6 +730,11 @@ static const struct sys64_hook cp15_64_hooks[] = { .esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCT, .handler = compat_cntvct_read_handler, }, + { + .esr_mask = ESR_ELx_CP15_64_ISS_SYS_MASK, + .esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCTSS, + .handler = compat_cntvct_read_handler, + }, {}, }; diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index f6b1a88245db..50bab186c49b 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -57,12 +57,13 @@ #define SBSS_ALIGN 0 #endif -#define RO_EXCEPTION_TABLE_ALIGN 8 +#define RO_EXCEPTION_TABLE_ALIGN 4 #define RUNTIME_DISCARD_EXIT #include <asm-generic/vmlinux.lds.h> #include <asm/cache.h> #include <asm/kernel-pgtable.h> +#include <asm/kexec.h> #include <asm/memory.h> #include <asm/page.h> @@ -100,6 +101,16 @@ jiffies = jiffies_64; #define HIBERNATE_TEXT #endif +#ifdef CONFIG_KEXEC_CORE +#define KEXEC_TEXT \ + . = ALIGN(SZ_4K); \ + __relocate_new_kernel_start = .; \ + *(.kexec_relocate.text) \ + __relocate_new_kernel_end = .; +#else +#define KEXEC_TEXT +#endif + #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 #define TRAMP_TEXT \ . = ALIGN(PAGE_SIZE); \ @@ -160,8 +171,8 @@ SECTIONS HYPERVISOR_TEXT IDMAP_TEXT HIBERNATE_TEXT + KEXEC_TEXT TRAMP_TEXT - *(.fixup) *(.gnu.warning) . = ALIGN(16); *(.got) /* Global offset table */ @@ -348,3 +359,10 @@ ASSERT(swapper_pg_dir - reserved_pg_dir == RESERVED_SWAPPER_OFFSET, ASSERT(swapper_pg_dir - tramp_pg_dir == TRAMP_SWAPPER_OFFSET, "TRAMP_SWAPPER_OFFSET is wrong!") #endif + +#ifdef CONFIG_KEXEC_CORE +/* kexec relocation code should fit into one KEXEC_CONTROL_PAGE_SIZE */ +ASSERT(__relocate_new_kernel_end - (__relocate_new_kernel_start & ~(SZ_4K - 1)) + <= SZ_4K, "kexec relocation code is too big or misaligned") +ASSERT(KEXEC_CONTROL_PAGE_SIZE >= SZ_4K, "KEXEC_CONTROL_PAGE_SIZE is broken") +#endif diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index a0e78a6027be..d5a47b93ef9b 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -30,8 +30,12 @@ #include <asm/processor.h> #include <asm/thread_info.h> -extern struct exception_table_entry __start___kvm_ex_table; -extern struct exception_table_entry __stop___kvm_ex_table; +struct kvm_exception_table_entry { + int insn, fixup; +}; + +extern struct kvm_exception_table_entry __start___kvm_ex_table; +extern struct kvm_exception_table_entry __stop___kvm_ex_table; /* Check whether the FP regs were dirtied while in the host-side run loop: */ static inline bool update_fp_enabled(struct kvm_vcpu *vcpu) @@ -510,7 +514,7 @@ static inline void __kvm_unexpected_el2_exception(void) { extern char __guest_exit_panic[]; unsigned long addr, fixup; - struct exception_table_entry *entry, *end; + struct kvm_exception_table_entry *entry, *end; unsigned long elr_el2 = read_sysreg(elr_el2); entry = &__start___kvm_ex_table; diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index a7efb2ad2a1c..a5a5f5b97b17 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -4,7 +4,7 @@ */ #include <linux/linkage.h> -#include <asm/assembler.h> +#include <asm/asm-uaccess.h> .text @@ -45,13 +45,11 @@ USER(9f, sttrh wzr, [x0]) USER(7f, sttrb wzr, [x2, #-1]) 5: mov x0, #0 ret -SYM_FUNC_END(__arch_clear_user) -EXPORT_SYMBOL(__arch_clear_user) - .section .fixup,"ax" - .align 2 + // Exception fixups 7: sub x0, x2, #5 // Adjust for faulting on the final byte... 8: add x0, x0, #4 // ...or the second word of the 4-7 byte case 9: sub x0, x2, x0 ret - .previous +SYM_FUNC_END(__arch_clear_user) +EXPORT_SYMBOL(__arch_clear_user) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 2cf999e41d30..34e317907524 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -60,11 +60,8 @@ SYM_FUNC_START(__arch_copy_from_user) #include "copy_template.S" mov x0, #0 // Nothing to copy ret -SYM_FUNC_END(__arch_copy_from_user) -EXPORT_SYMBOL(__arch_copy_from_user) - .section .fixup,"ax" - .align 2 + // Exception fixups 9997: cmp dst, dstin b.ne 9998f // Before being absolutely sure we couldn't copy anything, try harder @@ -72,4 +69,5 @@ USER(9998f, ldtrb tmp1w, [srcin]) strb tmp1w, [dst], #1 9998: sub x0, end, dst // bytes not copied ret - .previous +SYM_FUNC_END(__arch_copy_from_user) +EXPORT_SYMBOL(__arch_copy_from_user) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 9f380eecf653..802231772608 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -59,11 +59,8 @@ SYM_FUNC_START(__arch_copy_to_user) #include "copy_template.S" mov x0, #0 ret -SYM_FUNC_END(__arch_copy_to_user) -EXPORT_SYMBOL(__arch_copy_to_user) - .section .fixup,"ax" - .align 2 + // Exception fixups 9997: cmp dst, dstin b.ne 9998f // Before being absolutely sure we couldn't copy anything, try harder @@ -72,4 +69,5 @@ USER(9998f, sttrb tmp1w, [dst]) add dst, dst, #1 9998: sub x0, end, dst // bytes not copied ret - .previous +SYM_FUNC_END(__arch_copy_to_user) +EXPORT_SYMBOL(__arch_copy_to_user) diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index f188c9092696..ff1e800ba7a1 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PTDUMP_CORE) += ptdump.o obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o obj-$(CONFIG_TRANS_TABLE) += trans_pgd.o +obj-$(CONFIG_TRANS_TABLE) += trans_pgd-asm.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_ARM64_MTE) += mteswap.o KASAN_SANITIZE_physaddr.o += n diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index aa0060178343..c3d53811a15e 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -3,20 +3,87 @@ * Based on arch/arm/mm/extable.c */ +#include <linux/bitfield.h> #include <linux/extable.h> #include <linux/uaccess.h> -int fixup_exception(struct pt_regs *regs) +#include <asm/asm-extable.h> +#include <asm/ptrace.h> + +typedef bool (*ex_handler_t)(const struct exception_table_entry *, + struct pt_regs *); + +static inline unsigned long +get_ex_fixup(const struct exception_table_entry *ex) +{ + return ((unsigned long)&ex->fixup + ex->fixup); +} + +static bool ex_handler_fixup(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + regs->pc = get_ex_fixup(ex); + return true; +} + +static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); + int reg_zero = FIELD_GET(EX_DATA_REG_ZERO, ex->data); + + pt_regs_write_reg(regs, reg_err, -EFAULT); + pt_regs_write_reg(regs, reg_zero, 0); + + regs->pc = get_ex_fixup(ex); + return true; +} + +static bool +ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->type); + int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->type); + unsigned long data, addr, offset; + + addr = pt_regs_read_reg(regs, reg_addr); + + offset = addr & 0x7UL; + addr &= ~0x7UL; + + data = *(unsigned long*)addr; + +#ifndef __AARCH64EB__ + data >>= 8 * offset; +#else + data <<= 8 * offset; +#endif + + pt_regs_write_reg(regs, reg_data, data); + + regs->pc = get_ex_fixup(ex); + return true; +} + +bool fixup_exception(struct pt_regs *regs) { - const struct exception_table_entry *fixup; + const struct exception_table_entry *ex; - fixup = search_exception_tables(instruction_pointer(regs)); - if (!fixup) - return 0; + ex = search_exception_tables(instruction_pointer(regs)); + if (!ex) + return false; - if (in_bpf_jit(regs)) - return arm64_bpf_fixup_exception(fixup, regs); + switch (ex->type) { + case EX_TYPE_FIXUP: + return ex_handler_fixup(ex, regs); + case EX_TYPE_BPF: + return ex_handler_bpf(ex, regs); + case EX_TYPE_UACCESS_ERR_ZERO: + return ex_handler_uaccess_err_zero(ex, regs); + case EX_TYPE_LOAD_UNALIGNED_ZEROPAD: + return ex_handler_load_unaligned_zeropad(ex, regs); + } - regs->pc = (unsigned long)&fixup->fixup + fixup->fixup; - return 1; + BUG(); } diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 23505fc35324..029cf5e42c4c 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -40,11 +40,10 @@ void __init arm64_hugetlb_cma_reserve(void) { int order; -#ifdef CONFIG_ARM64_4K_PAGES - order = PUD_SHIFT - PAGE_SHIFT; -#else - order = CONT_PMD_SHIFT + PMD_SHIFT - PAGE_SHIFT; -#endif + if (pud_sect_supported()) + order = PUD_SHIFT - PAGE_SHIFT; + else + order = CONT_PMD_SHIFT + PMD_SHIFT - PAGE_SHIFT; /* * HugeTLB CMA reservation is required for gigantic * huge pages which could not be allocated via the @@ -62,8 +61,9 @@ bool arch_hugetlb_migration_supported(struct hstate *h) size_t pagesize = huge_page_size(h); switch (pagesize) { -#ifdef CONFIG_ARM64_4K_PAGES +#ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: + return pud_sect_supported(); #endif case PMD_SIZE: case CONT_PMD_SIZE: @@ -126,8 +126,11 @@ static inline int num_contig_ptes(unsigned long size, size_t *pgsize) *pgsize = size; switch (size) { -#ifdef CONFIG_ARM64_4K_PAGES +#ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: + if (pud_sect_supported()) + contig_ptes = 1; + break; #endif case PMD_SIZE: contig_ptes = 1; @@ -489,9 +492,9 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma, static int __init hugetlbpage_init(void) { -#ifdef CONFIG_ARM64_4K_PAGES - hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); -#endif + if (pud_sect_supported()) + hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + hugetlb_add_hstate(CONT_PMD_SHIFT - PAGE_SHIFT); hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); hugetlb_add_hstate(CONT_PTE_SHIFT - PAGE_SHIFT); @@ -503,8 +506,9 @@ arch_initcall(hugetlbpage_init); bool __init arch_hugetlb_valid_size(unsigned long size) { switch (size) { -#ifdef CONFIG_ARM64_4K_PAGES +#ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: + return pud_sect_supported(); #endif case CONT_PMD_SIZE: case PMD_SIZE: diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 37a81754d9b6..142125749783 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -416,8 +416,6 @@ void __init mem_init(void) else if (!xen_swiotlb_detect()) swiotlb_force = SWIOTLB_NO_FORCE; - set_max_mapnr(max_pfn - PHYS_PFN_OFFSET); - /* this will put all unused low memory onto the freelists */ memblock_free_all(); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index cfd9deb347c3..fd85b51b9d50 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1499,6 +1499,11 @@ int arch_add_memory(int nid, u64 start, u64 size, if (ret) __remove_pgd_mapping(swapper_pg_dir, __phys_to_virt(start), size); + else { + max_pfn = PFN_UP(start + size); + max_low_pfn = max_pfn; + } + return ret; } diff --git a/arch/arm64/mm/trans_pgd-asm.S b/arch/arm64/mm/trans_pgd-asm.S new file mode 100644 index 000000000000..021c31573bcb --- /dev/null +++ b/arch/arm64/mm/trans_pgd-asm.S @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* + * Copyright (c) 2021, Microsoft Corporation. + * Pasha Tatashin <pasha.tatashin@soleen.com> + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/kvm_asm.h> + +.macro invalid_vector label +SYM_CODE_START_LOCAL(\label) + .align 7 + b \label +SYM_CODE_END(\label) +.endm + +.macro el1_sync_vector +SYM_CODE_START_LOCAL(el1_sync) + .align 7 + cmp x0, #HVC_SET_VECTORS /* Called from hibernate */ + b.ne 1f + msr vbar_el2, x1 + mov x0, xzr + eret +1: cmp x0, #HVC_SOFT_RESTART /* Called from kexec */ + b.ne 2f + mov x0, x2 + mov x2, x4 + mov x4, x1 + mov x1, x3 + br x4 +2: /* Unexpected argument, set an error */ + mov_q x0, HVC_STUB_ERR + eret +SYM_CODE_END(el1_sync) +.endm + +SYM_CODE_START(trans_pgd_stub_vectors) + invalid_vector hyp_stub_el2t_sync_invalid // Synchronous EL2t + invalid_vector hyp_stub_el2t_irq_invalid // IRQ EL2t + invalid_vector hyp_stub_el2t_fiq_invalid // FIQ EL2t + invalid_vector hyp_stub_el2t_error_invalid // Error EL2t + + invalid_vector hyp_stub_el2h_sync_invalid // Synchronous EL2h + invalid_vector hyp_stub_el2h_irq_invalid // IRQ EL2h + invalid_vector hyp_stub_el2h_fiq_invalid // FIQ EL2h + invalid_vector hyp_stub_el2h_error_invalid // Error EL2h + + el1_sync_vector // Synchronous 64-bit EL1 + invalid_vector hyp_stub_el1_irq_invalid // IRQ 64-bit EL1 + invalid_vector hyp_stub_el1_fiq_invalid // FIQ 64-bit EL1 + invalid_vector hyp_stub_el1_error_invalid // Error 64-bit EL1 + + invalid_vector hyp_stub_32b_el1_sync_invalid // Synchronous 32-bit EL1 + invalid_vector hyp_stub_32b_el1_irq_invalid // IRQ 32-bit EL1 + invalid_vector hyp_stub_32b_el1_fiq_invalid // FIQ 32-bit EL1 + invalid_vector hyp_stub_32b_el1_error_invalid // Error 32-bit EL1 + .align 11 +SYM_INNER_LABEL(__trans_pgd_stub_vectors_end, SYM_L_LOCAL) +SYM_CODE_END(trans_pgd_stub_vectors) + +# Check the trans_pgd_stub_vectors didn't overflow +.org . - (__trans_pgd_stub_vectors_end - trans_pgd_stub_vectors) + SZ_2K diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c index 527f0a39c3da..d7da8ca40d2e 100644 --- a/arch/arm64/mm/trans_pgd.c +++ b/arch/arm64/mm/trans_pgd.c @@ -5,8 +5,8 @@ * * This file derived from: arch/arm64/kernel/hibernate.c * - * Copyright (c) 2020, Microsoft Corporation. - * Pavel Tatashin <pasha.tatashin@soleen.com> + * Copyright (c) 2021, Microsoft Corporation. + * Pasha Tatashin <pasha.tatashin@soleen.com> * */ @@ -218,63 +218,6 @@ int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **dst_pgdp, } /* - * Add map entry to trans_pgd for a base-size page at PTE level. - * info: contains allocator and its argument - * trans_pgd: page table in which new map is added. - * page: page to be mapped. - * dst_addr: new VA address for the page - * pgprot: protection for the page. - * - * Returns 0 on success, and -ENOMEM on failure. - */ -int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd, - void *page, unsigned long dst_addr, pgprot_t pgprot) -{ - pgd_t *pgdp; - p4d_t *p4dp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - - pgdp = pgd_offset_pgd(trans_pgd, dst_addr); - if (pgd_none(READ_ONCE(*pgdp))) { - p4dp = trans_alloc(info); - if (!pgdp) - return -ENOMEM; - pgd_populate(NULL, pgdp, p4dp); - } - - p4dp = p4d_offset(pgdp, dst_addr); - if (p4d_none(READ_ONCE(*p4dp))) { - pudp = trans_alloc(info); - if (!pudp) - return -ENOMEM; - p4d_populate(NULL, p4dp, pudp); - } - - pudp = pud_offset(p4dp, dst_addr); - if (pud_none(READ_ONCE(*pudp))) { - pmdp = trans_alloc(info); - if (!pmdp) - return -ENOMEM; - pud_populate(NULL, pudp, pmdp); - } - - pmdp = pmd_offset(pudp, dst_addr); - if (pmd_none(READ_ONCE(*pmdp))) { - ptep = trans_alloc(info); - if (!ptep) - return -ENOMEM; - pmd_populate_kernel(NULL, pmdp, ptep); - } - - ptep = pte_offset_kernel(pmdp, dst_addr); - set_pte(ptep, pfn_pte(virt_to_pfn(page), pgprot)); - - return 0; -} - -/* * The page we want to idmap may be outside the range covered by VA_BITS that * can be built using the kernel's p?d_populate() helpers. As a one off, for a * single page, we build these page tables bottom up and just assume that will @@ -322,3 +265,26 @@ int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0, return 0; } + +/* + * Create a copy of the vector table so we can call HVC_SET_VECTORS or + * HVC_SOFT_RESTART from contexts where the table may be overwritten. + */ +int trans_pgd_copy_el2_vectors(struct trans_pgd_info *info, + phys_addr_t *el2_vectors) +{ + void *hyp_stub = trans_alloc(info); + + if (!hyp_stub) + return -ENOMEM; + *el2_vectors = virt_to_phys(hyp_stub); + memcpy(hyp_stub, &trans_pgd_stub_vectors, ARM64_VECTOR_TABLE_LEN); + caches_clean_inval_pou((unsigned long)hyp_stub, + (unsigned long)hyp_stub + + ARM64_VECTOR_TABLE_LEN); + dcache_clean_inval_poc((unsigned long)hyp_stub, + (unsigned long)hyp_stub + + ARM64_VECTOR_TABLE_LEN); + + return 0; +} diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 41c23f474ea6..7df7345e60d8 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -13,6 +13,7 @@ #include <linux/printk.h> #include <linux/slab.h> +#include <asm/asm-extable.h> #include <asm/byteorder.h> #include <asm/cacheflush.h> #include <asm/debug-monitors.h> @@ -358,15 +359,15 @@ static void build_epilogue(struct jit_ctx *ctx) #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) #define BPF_FIXUP_REG_MASK GENMASK(31, 27) -int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, - struct pt_regs *regs) +bool ex_handler_bpf(const struct exception_table_entry *ex, + struct pt_regs *regs) { off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); regs->regs[dst_reg] = 0; regs->pc = (unsigned long)&ex->fixup - offset; - return 1; + return true; } /* For accesses to BTF pointers, add an entry to the exception table */ @@ -412,6 +413,8 @@ static int add_exception_handler(const struct bpf_insn *insn, ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); + ex->type = EX_TYPE_BPF; + ctx->exentry_idx++; return 0; } diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 74a569bf52d6..e91795d1a8a3 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -18,6 +18,7 @@ HAS_CRC32 HAS_DCPODP HAS_DCPOP HAS_E0PD +HAS_ECV HAS_EPAN HAS_GENERIC_AUTH HAS_GENERIC_AUTH_ARCH |