diff options
Diffstat (limited to 'arch/arm64/kernel')
25 files changed, 627 insertions, 510 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index a2faf0049dab..76f32e424065 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -80,7 +80,7 @@ obj-y += head.o always-$(KBUILD_BUILTIN) += vmlinux.lds ifeq ($(CONFIG_DEBUG_EFI),y) -AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\"" +AFLAGS_head.o += -DVMLINUX_PATH="\"$(abspath vmlinux)\"" endif # for cleaning diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index b9a66fc146c9..4d529ff7ba51 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -197,6 +197,8 @@ out: */ void __init acpi_boot_table_init(void) { + int ret; + /* * Enable ACPI instead of device tree unless * - ACPI has been disabled explicitly (acpi=off), or @@ -250,10 +252,12 @@ done: * behaviour, use acpi=nospcr to disable console in ACPI SPCR * table as default serial console. */ - acpi_parse_spcr(earlycon_acpi_spcr_enable, + ret = acpi_parse_spcr(earlycon_acpi_spcr_enable, !param_acpi_nospcr); - pr_info("Use ACPI SPCR as default console: %s\n", - param_acpi_nospcr ? "No" : "Yes"); + if (!ret || param_acpi_nospcr || !IS_ENABLED(CONFIG_ACPI_SPCR_TABLE)) + pr_info("Use ACPI SPCR as default console: No\n"); + else + pr_info("Use ACPI SPCR as default console: Yes\n"); if (IS_ENABLED(CONFIG_ACPI_BGRT)) acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e151585c6cca..a006e38b2684 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -320,6 +320,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { static const struct arm64_ftr_bits ftr_id_aa64pfr2[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_FPMR_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_MTEFAR_SHIFT, 4, ID_AA64PFR2_EL1_MTEFAR_NI), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_MTESTOREONLY_SHIFT, 4, ID_AA64PFR2_EL1_MTESTOREONLY_NI), ARM64_FTR_END, }; @@ -2213,6 +2215,38 @@ static bool hvhe_possible(const struct arm64_cpu_capabilities *entry, return arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_HVHE); } +static bool has_bbml2_noabort(const struct arm64_cpu_capabilities *caps, int scope) +{ + /* + * We want to allow usage of BBML2 in as wide a range of kernel contexts + * as possible. This list is therefore an allow-list of known-good + * implementations that both support BBML2 and additionally, fulfill the + * extra constraint of never generating TLB conflict aborts when using + * the relaxed BBML2 semantics (such aborts make use of BBML2 in certain + * kernel contexts difficult to prove safe against recursive aborts). + * + * Note that implementations can only be considered "known-good" if their + * implementors attest to the fact that the implementation never raises + * TLB conflict aborts for BBML2 mapping granularity changes. + */ + static const struct midr_range supports_bbml2_noabort_list[] = { + MIDR_REV_RANGE(MIDR_CORTEX_X4, 0, 3, 0xf), + MIDR_REV_RANGE(MIDR_NEOVERSE_V3, 0, 2, 0xf), + {} + }; + + /* Does our cpu guarantee to never raise TLB conflict aborts? */ + if (!is_midr_in_range_list(supports_bbml2_noabort_list)) + return false; + + /* + * We currently ignore the ID_AA64MMFR2_EL1 register, and only care + * about whether the MIDR check passes. + */ + + return true; +} + #ifdef CONFIG_ARM64_PAN static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) { @@ -2874,6 +2908,20 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, MTE, MTE3) }, + { + .desc = "FAR on MTE Tag Check Fault", + .capability = ARM64_MTE_FAR, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, MTEFAR, IMP) + }, + { + .desc = "Store Only MTE Tag Check", + .capability = ARM64_MTE_STORE_ONLY, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, MTESTOREONLY, IMP) + }, #endif /* CONFIG_ARM64_MTE */ { .desc = "RCpc load-acquire (LDAPR)", @@ -2981,6 +3029,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP) }, { + .desc = "BBM Level 2 without TLB conflict abort", + .capability = ARM64_HAS_BBML2_NOABORT, + .type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE, + .matches = has_bbml2_noabort, + }, + { .desc = "52-bit Virtual Addressing for KVM (LPA2)", .capability = ARM64_HAS_LPA2, .type = ARM64_CPUCAP_SYSTEM_FEATURE, @@ -3218,6 +3272,8 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #ifdef CONFIG_ARM64_MTE HWCAP_CAP(ID_AA64PFR1_EL1, MTE, MTE2, CAP_HWCAP, KERNEL_HWCAP_MTE), HWCAP_CAP(ID_AA64PFR1_EL1, MTE, MTE3, CAP_HWCAP, KERNEL_HWCAP_MTE3), + HWCAP_CAP(ID_AA64PFR2_EL1, MTEFAR, IMP, CAP_HWCAP, KERNEL_HWCAP_MTE_FAR), + HWCAP_CAP(ID_AA64PFR2_EL1, MTESTOREONLY, IMP, CAP_HWCAP , KERNEL_HWCAP_MTE_STORE_ONLY), #endif /* CONFIG_ARM64_MTE */ HWCAP_CAP(ID_AA64MMFR0_EL1, ECV, IMP, CAP_HWCAP, KERNEL_HWCAP_ECV), HWCAP_CAP(ID_AA64MMFR1_EL1, AFP, IMP, CAP_HWCAP, KERNEL_HWCAP_AFP), @@ -3377,18 +3433,49 @@ static void update_cpu_capabilities(u16 scope_mask) scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (i = 0; i < ARM64_NCAPS; i++) { + bool match_all = false; + bool caps_set = false; + bool boot_cpu = false; + caps = cpucap_ptrs[i]; - if (!caps || !(caps->type & scope_mask) || - cpus_have_cap(caps->capability) || - !caps->matches(caps, cpucap_default_scope(caps))) + if (!caps || !(caps->type & scope_mask)) continue; - if (caps->desc && !caps->cpus) + match_all = cpucap_match_all_early_cpus(caps); + caps_set = cpus_have_cap(caps->capability); + boot_cpu = scope_mask & SCOPE_BOOT_CPU; + + /* + * Unless it's a match-all CPUs feature, avoid probing if + * already detected. + */ + if (!match_all && caps_set) + continue; + + /* + * A match-all CPUs capability is only set when probing the + * boot CPU. It may be cleared subsequently if not detected on + * secondary ones. + */ + if (match_all && !caps_set && !boot_cpu) + continue; + + if (!caps->matches(caps, cpucap_default_scope(caps))) { + if (match_all) + __clear_bit(caps->capability, system_cpucaps); + continue; + } + + /* + * Match-all CPUs capabilities are logged later when the + * system capabilities are finalised. + */ + if (!match_all && caps->desc && !caps->cpus) pr_info("detected: %s\n", caps->desc); __set_bit(caps->capability, system_cpucaps); - if ((scope_mask & SCOPE_BOOT_CPU) && (caps->type & SCOPE_BOOT_CPU)) + if (boot_cpu && (caps->type & SCOPE_BOOT_CPU)) set_bit(caps->capability, boot_cpucaps); } } @@ -3789,17 +3876,24 @@ static void __init setup_system_capabilities(void) enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU); apply_alternatives_all(); - /* - * Log any cpucaps with a cpumask as these aren't logged by - * update_cpu_capabilities(). - */ for (int i = 0; i < ARM64_NCAPS; i++) { const struct arm64_cpu_capabilities *caps = cpucap_ptrs[i]; - if (caps && caps->cpus && caps->desc && - cpumask_any(caps->cpus) < nr_cpu_ids) + if (!caps || !caps->desc) + continue; + + /* + * Log any cpucaps with a cpumask as these aren't logged by + * update_cpu_capabilities(). + */ + if (caps->cpus && cpumask_any(caps->cpus) < nr_cpu_ids) pr_info("detected: %s on CPU%*pbl\n", caps->desc, cpumask_pr_args(caps->cpus)); + + /* Log match-all CPUs capabilities */ + if (cpucap_match_all_early_cpus(caps) && + cpus_have_cap(caps->capability)) + pr_info("detected: %s\n", caps->desc); } /* diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index c1f2b6b04b41..ba834909a28b 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -160,6 +160,8 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_SME_SFEXPA] = "smesfexpa", [KERNEL_HWCAP_SME_STMOP] = "smestmop", [KERNEL_HWCAP_SME_SMOP4] = "smesmop4", + [KERNEL_HWCAP_MTE_FAR] = "mtefar", + [KERNEL_HWCAP_MTE_STORE_ONLY] = "mtestoreonly", }; #ifdef CONFIG_COMPAT diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 58f047de3e1c..110d9ff54174 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -21,8 +21,12 @@ #include <asm/cputype.h> #include <asm/daifflags.h> #include <asm/debug-monitors.h> +#include <asm/exception.h> +#include <asm/kgdb.h> +#include <asm/kprobes.h> #include <asm/system_misc.h> #include <asm/traps.h> +#include <asm/uprobes.h> /* Determine debug architecture. */ u8 debug_monitors_arch(void) @@ -34,7 +38,7 @@ u8 debug_monitors_arch(void) /* * MDSCR access routines. */ -static void mdscr_write(u32 mdscr) +static void mdscr_write(u64 mdscr) { unsigned long flags; flags = local_daif_save(); @@ -43,7 +47,7 @@ static void mdscr_write(u32 mdscr) } NOKPROBE_SYMBOL(mdscr_write); -static u32 mdscr_read(void) +static u64 mdscr_read(void) { return read_sysreg(mdscr_el1); } @@ -79,16 +83,16 @@ static DEFINE_PER_CPU(int, kde_ref_count); void enable_debug_monitors(enum dbg_active_el el) { - u32 mdscr, enable = 0; + u64 mdscr, enable = 0; WARN_ON(preemptible()); if (this_cpu_inc_return(mde_ref_count) == 1) - enable = DBG_MDSCR_MDE; + enable = MDSCR_EL1_MDE; if (el == DBG_ACTIVE_EL1 && this_cpu_inc_return(kde_ref_count) == 1) - enable |= DBG_MDSCR_KDE; + enable |= MDSCR_EL1_KDE; if (enable && debug_enabled) { mdscr = mdscr_read(); @@ -100,16 +104,16 @@ NOKPROBE_SYMBOL(enable_debug_monitors); void disable_debug_monitors(enum dbg_active_el el) { - u32 mdscr, disable = 0; + u64 mdscr, disable = 0; WARN_ON(preemptible()); if (this_cpu_dec_return(mde_ref_count) == 0) - disable = ~DBG_MDSCR_MDE; + disable = ~MDSCR_EL1_MDE; if (el == DBG_ACTIVE_EL1 && this_cpu_dec_return(kde_ref_count) == 0) - disable &= ~DBG_MDSCR_KDE; + disable &= ~MDSCR_EL1_KDE; if (disable) { mdscr = mdscr_read(); @@ -156,74 +160,6 @@ NOKPROBE_SYMBOL(clear_user_regs_spsr_ss); #define set_regs_spsr_ss(r) set_user_regs_spsr_ss(&(r)->user_regs) #define clear_regs_spsr_ss(r) clear_user_regs_spsr_ss(&(r)->user_regs) -static DEFINE_SPINLOCK(debug_hook_lock); -static LIST_HEAD(user_step_hook); -static LIST_HEAD(kernel_step_hook); - -static void register_debug_hook(struct list_head *node, struct list_head *list) -{ - spin_lock(&debug_hook_lock); - list_add_rcu(node, list); - spin_unlock(&debug_hook_lock); - -} - -static void unregister_debug_hook(struct list_head *node) -{ - spin_lock(&debug_hook_lock); - list_del_rcu(node); - spin_unlock(&debug_hook_lock); - synchronize_rcu(); -} - -void register_user_step_hook(struct step_hook *hook) -{ - register_debug_hook(&hook->node, &user_step_hook); -} - -void unregister_user_step_hook(struct step_hook *hook) -{ - unregister_debug_hook(&hook->node); -} - -void register_kernel_step_hook(struct step_hook *hook) -{ - register_debug_hook(&hook->node, &kernel_step_hook); -} - -void unregister_kernel_step_hook(struct step_hook *hook) -{ - unregister_debug_hook(&hook->node); -} - -/* - * Call registered single step handlers - * There is no Syndrome info to check for determining the handler. - * So we call all the registered handlers, until the right handler is - * found which returns zero. - */ -static int call_step_hook(struct pt_regs *regs, unsigned long esr) -{ - struct step_hook *hook; - struct list_head *list; - int retval = DBG_HOOK_ERROR; - - list = user_mode(regs) ? &user_step_hook : &kernel_step_hook; - - /* - * Since single-step exception disables interrupt, this function is - * entirely not preemptible, and we can use rcu list safely here. - */ - list_for_each_entry_rcu(hook, list, node) { - retval = hook->fn(regs, esr); - if (retval == DBG_HOOK_HANDLED) - break; - } - - return retval; -} -NOKPROBE_SYMBOL(call_step_hook); - static void send_user_sigtrap(int si_code) { struct pt_regs *regs = current_pt_regs(); @@ -238,105 +174,110 @@ static void send_user_sigtrap(int si_code) "User debug trap"); } -static int single_step_handler(unsigned long unused, unsigned long esr, - struct pt_regs *regs) +/* + * We have already unmasked interrupts and enabled preemption + * when calling do_el0_softstep() from entry-common.c. + */ +void do_el0_softstep(unsigned long esr, struct pt_regs *regs) { - bool handler_found = false; + if (uprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED) + return; + send_user_sigtrap(TRAP_TRACE); /* - * If we are stepping a pending breakpoint, call the hw_breakpoint - * handler first. + * ptrace will disable single step unless explicitly + * asked to re-enable it. For other clients, it makes + * sense to leave it enabled (i.e. rewind the controls + * to the active-not-pending state). */ - if (!reinstall_suspended_bps(regs)) - return 0; - - if (!handler_found && call_step_hook(regs, esr) == DBG_HOOK_HANDLED) - handler_found = true; - - if (!handler_found && user_mode(regs)) { - send_user_sigtrap(TRAP_TRACE); - - /* - * ptrace will disable single step unless explicitly - * asked to re-enable it. For other clients, it makes - * sense to leave it enabled (i.e. rewind the controls - * to the active-not-pending state). - */ - user_rewind_single_step(current); - } else if (!handler_found) { - pr_warn("Unexpected kernel single-step exception at EL1\n"); - /* - * Re-enable stepping since we know that we will be - * returning to regs. - */ - set_regs_spsr_ss(regs); - } - - return 0; + user_rewind_single_step(current); } -NOKPROBE_SYMBOL(single_step_handler); - -static LIST_HEAD(user_break_hook); -static LIST_HEAD(kernel_break_hook); -void register_user_break_hook(struct break_hook *hook) +void do_el1_softstep(unsigned long esr, struct pt_regs *regs) { - register_debug_hook(&hook->node, &user_break_hook); -} + if (kgdb_single_step_handler(regs, esr) == DBG_HOOK_HANDLED) + return; -void unregister_user_break_hook(struct break_hook *hook) -{ - unregister_debug_hook(&hook->node); + pr_warn("Unexpected kernel single-step exception at EL1\n"); + /* + * Re-enable stepping since we know that we will be + * returning to regs. + */ + set_regs_spsr_ss(regs); } +NOKPROBE_SYMBOL(do_el1_softstep); -void register_kernel_break_hook(struct break_hook *hook) +static int call_el1_break_hook(struct pt_regs *regs, unsigned long esr) { - register_debug_hook(&hook->node, &kernel_break_hook); -} + if (esr_brk_comment(esr) == BUG_BRK_IMM) + return bug_brk_handler(regs, esr); -void unregister_kernel_break_hook(struct break_hook *hook) -{ - unregister_debug_hook(&hook->node); -} + if (IS_ENABLED(CONFIG_CFI_CLANG) && esr_is_cfi_brk(esr)) + return cfi_brk_handler(regs, esr); -static int call_break_hook(struct pt_regs *regs, unsigned long esr) -{ - struct break_hook *hook; - struct list_head *list; + if (esr_brk_comment(esr) == FAULT_BRK_IMM) + return reserved_fault_brk_handler(regs, esr); - list = user_mode(regs) ? &user_break_hook : &kernel_break_hook; + if (IS_ENABLED(CONFIG_KASAN_SW_TAGS) && + (esr_brk_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM) + return kasan_brk_handler(regs, esr); - /* - * Since brk exception disables interrupt, this function is - * entirely not preemptible, and we can use rcu list safely here. - */ - list_for_each_entry_rcu(hook, list, node) { - if ((esr_brk_comment(esr) & ~hook->mask) == hook->imm) - return hook->fn(regs, esr); + if (IS_ENABLED(CONFIG_UBSAN_TRAP) && esr_is_ubsan_brk(esr)) + return ubsan_brk_handler(regs, esr); + + if (IS_ENABLED(CONFIG_KGDB)) { + if (esr_brk_comment(esr) == KGDB_DYN_DBG_BRK_IMM) + return kgdb_brk_handler(regs, esr); + if (esr_brk_comment(esr) == KGDB_COMPILED_DBG_BRK_IMM) + return kgdb_compiled_brk_handler(regs, esr); + } + + if (IS_ENABLED(CONFIG_KPROBES)) { + if (esr_brk_comment(esr) == KPROBES_BRK_IMM) + return kprobe_brk_handler(regs, esr); + if (esr_brk_comment(esr) == KPROBES_BRK_SS_IMM) + return kprobe_ss_brk_handler(regs, esr); } + if (IS_ENABLED(CONFIG_KRETPROBES) && + esr_brk_comment(esr) == KRETPROBES_BRK_IMM) + return kretprobe_brk_handler(regs, esr); + return DBG_HOOK_ERROR; } -NOKPROBE_SYMBOL(call_break_hook); +NOKPROBE_SYMBOL(call_el1_break_hook); -static int brk_handler(unsigned long unused, unsigned long esr, - struct pt_regs *regs) +/* + * We have already unmasked interrupts and enabled preemption + * when calling do_el0_brk64() from entry-common.c. + */ +void do_el0_brk64(unsigned long esr, struct pt_regs *regs) { - if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED) - return 0; + if (IS_ENABLED(CONFIG_UPROBES) && + esr_brk_comment(esr) == UPROBES_BRK_IMM && + uprobe_brk_handler(regs, esr) == DBG_HOOK_HANDLED) + return; - if (user_mode(regs)) { - send_user_sigtrap(TRAP_BRKPT); - } else { - pr_warn("Unexpected kernel BRK exception at EL1\n"); - return -EFAULT; - } + send_user_sigtrap(TRAP_BRKPT); +} - return 0; +void do_el1_brk64(unsigned long esr, struct pt_regs *regs) +{ + if (call_el1_break_hook(regs, esr) == DBG_HOOK_HANDLED) + return; + + die("Oops - BRK", regs, esr); +} +NOKPROBE_SYMBOL(do_el1_brk64); + +#ifdef CONFIG_COMPAT +void do_bkpt32(unsigned long esr, struct pt_regs *regs) +{ + arm64_notify_die("aarch32 BKPT", regs, SIGTRAP, TRAP_BRKPT, regs->pc, esr); } -NOKPROBE_SYMBOL(brk_handler); +#endif /* CONFIG_COMPAT */ -int aarch32_break_handler(struct pt_regs *regs) +bool try_handle_aarch32_break(struct pt_regs *regs) { u32 arm_instr; u16 thumb_instr; @@ -344,7 +285,7 @@ int aarch32_break_handler(struct pt_regs *regs) void __user *pc = (void __user *)instruction_pointer(regs); if (!compat_user_mode(regs)) - return -EFAULT; + return false; if (compat_thumb_mode(regs)) { /* get 16-bit Thumb instruction */ @@ -368,20 +309,12 @@ int aarch32_break_handler(struct pt_regs *regs) } if (!bp) - return -EFAULT; + return false; send_user_sigtrap(TRAP_BRKPT); - return 0; -} -NOKPROBE_SYMBOL(aarch32_break_handler); - -void __init debug_traps_init(void) -{ - hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP, - TRAP_TRACE, "single-step handler"); - hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP, - TRAP_BRKPT, "BRK handler"); + return true; } +NOKPROBE_SYMBOL(try_handle_aarch32_break); /* Re-enable single step for syscall restarting. */ void user_rewind_single_step(struct task_struct *task) @@ -415,7 +348,7 @@ void kernel_enable_single_step(struct pt_regs *regs) { WARN_ON(!irqs_disabled()); set_regs_spsr_ss(regs); - mdscr_write(mdscr_read() | DBG_MDSCR_SS); + mdscr_write(mdscr_read() | MDSCR_EL1_SS); enable_debug_monitors(DBG_ACTIVE_EL1); } NOKPROBE_SYMBOL(kernel_enable_single_step); @@ -423,7 +356,7 @@ NOKPROBE_SYMBOL(kernel_enable_single_step); void kernel_disable_single_step(void) { WARN_ON(!irqs_disabled()); - mdscr_write(mdscr_read() & ~DBG_MDSCR_SS); + mdscr_write(mdscr_read() & ~MDSCR_EL1_SS); disable_debug_monitors(DBG_ACTIVE_EL1); } NOKPROBE_SYMBOL(kernel_disable_single_step); @@ -431,7 +364,7 @@ NOKPROBE_SYMBOL(kernel_disable_single_step); int kernel_active_single_step(void) { WARN_ON(!irqs_disabled()); - return mdscr_read() & DBG_MDSCR_SS; + return mdscr_read() & MDSCR_EL1_SS; } NOKPROBE_SYMBOL(kernel_active_single_step); diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 62230d6dd919..6c371b158b99 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -215,11 +215,6 @@ static int __init arm64_efi_rt_init(void) if (!efi_enabled(EFI_RUNTIME_SERVICES)) return 0; - if (!IS_ENABLED(CONFIG_VMAP_STACK)) { - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return -ENOMEM; - } - p = arch_alloc_vmap_stack(THREAD_SIZE, NUMA_NO_NODE); if (!p) { pr_warn("Failed to allocate EFI runtime stack\n"); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 7c1970b341b8..2b0c5925502e 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -8,6 +8,7 @@ #include <linux/context_tracking.h> #include <linux/kasan.h> #include <linux/linkage.h> +#include <linux/livepatch.h> #include <linux/lockdep.h> #include <linux/ptrace.h> #include <linux/resume_user_mode.h> @@ -144,6 +145,9 @@ static void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) (void __user *)NULL, current); } + if (thread_flags & _TIF_PATCH_PENDING) + klp_update_patch_state(current); + if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs); @@ -344,7 +348,7 @@ static DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); static void cortex_a76_erratum_1463225_svc_handler(void) { - u32 reg, val; + u64 reg, val; if (!unlikely(test_thread_flag(TIF_SINGLESTEP))) return; @@ -354,7 +358,7 @@ static void cortex_a76_erratum_1463225_svc_handler(void) __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 1); reg = read_sysreg(mdscr_el1); - val = reg | DBG_MDSCR_SS | DBG_MDSCR_KDE; + val = reg | MDSCR_EL1_SS | MDSCR_EL1_KDE; write_sysreg(val, mdscr_el1); asm volatile("msr daifclr, #8"); isb(); @@ -441,6 +445,28 @@ static __always_inline void fpsimd_syscall_exit(void) __this_cpu_write(fpsimd_last_state.to_save, FP_STATE_CURRENT); } +/* + * In debug exception context, we explicitly disable preemption despite + * having interrupts disabled. + * This serves two purposes: it makes it much less likely that we would + * accidentally schedule in exception context and it will force a warning + * if we somehow manage to schedule by accident. + */ +static void debug_exception_enter(struct pt_regs *regs) +{ + preempt_disable(); + + /* This code is a bit fragile. Test it. */ + RCU_LOCKDEP_WARN(!rcu_is_watching(), "exception_enter didn't work"); +} +NOKPROBE_SYMBOL(debug_exception_enter); + +static void debug_exception_exit(struct pt_regs *regs) +{ + preempt_enable_no_resched(); +} +NOKPROBE_SYMBOL(debug_exception_exit); + UNHANDLED(el1t, 64, sync) UNHANDLED(el1t, 64, irq) UNHANDLED(el1t, 64, fiq) @@ -504,13 +530,51 @@ static void noinstr el1_mops(struct pt_regs *regs, unsigned long esr) exit_to_kernel_mode(regs); } -static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_breakpt(struct pt_regs *regs, unsigned long esr) +{ + arm64_enter_el1_dbg(regs); + debug_exception_enter(regs); + do_breakpoint(esr, regs); + debug_exception_exit(regs); + arm64_exit_el1_dbg(regs); +} + +static void noinstr el1_softstp(struct pt_regs *regs, unsigned long esr) { + arm64_enter_el1_dbg(regs); + if (!cortex_a76_erratum_1463225_debug_handler(regs)) { + debug_exception_enter(regs); + /* + * After handling a breakpoint, we suspend the breakpoint + * and use single-step to move to the next instruction. + * If we are stepping a suspended breakpoint there's nothing more to do: + * the single-step is complete. + */ + if (!try_step_suspended_breakpoints(regs)) + do_el1_softstep(esr, regs); + debug_exception_exit(regs); + } + arm64_exit_el1_dbg(regs); +} + +static void noinstr el1_watchpt(struct pt_regs *regs, unsigned long esr) +{ + /* Watchpoints are the only debug exception to write FAR_EL1 */ unsigned long far = read_sysreg(far_el1); arm64_enter_el1_dbg(regs); - if (!cortex_a76_erratum_1463225_debug_handler(regs)) - do_debug_exception(far, esr, regs); + debug_exception_enter(regs); + do_watchpoint(far, esr, regs); + debug_exception_exit(regs); + arm64_exit_el1_dbg(regs); +} + +static void noinstr el1_brk64(struct pt_regs *regs, unsigned long esr) +{ + arm64_enter_el1_dbg(regs); + debug_exception_enter(regs); + do_el1_brk64(esr, regs); + debug_exception_exit(regs); arm64_exit_el1_dbg(regs); } @@ -553,10 +617,16 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs) el1_mops(regs, esr); break; case ESR_ELx_EC_BREAKPT_CUR: + el1_breakpt(regs, esr); + break; case ESR_ELx_EC_SOFTSTP_CUR: + el1_softstp(regs, esr); + break; case ESR_ELx_EC_WATCHPT_CUR: + el1_watchpt(regs, esr); + break; case ESR_ELx_EC_BRK64: - el1_dbg(regs, esr); + el1_brk64(regs, esr); break; case ESR_ELx_EC_FPAC: el1_fpac(regs, esr); @@ -747,14 +817,56 @@ static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr) exit_to_user_mode(regs); } -static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_breakpt(struct pt_regs *regs, unsigned long esr) { - /* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */ + if (!is_ttbr0_addr(regs->pc)) + arm64_apply_bp_hardening(); + + enter_from_user_mode(regs); + debug_exception_enter(regs); + do_breakpoint(esr, regs); + debug_exception_exit(regs); + local_daif_restore(DAIF_PROCCTX); + exit_to_user_mode(regs); +} + +static void noinstr el0_softstp(struct pt_regs *regs, unsigned long esr) +{ + if (!is_ttbr0_addr(regs->pc)) + arm64_apply_bp_hardening(); + + enter_from_user_mode(regs); + /* + * After handling a breakpoint, we suspend the breakpoint + * and use single-step to move to the next instruction. + * If we are stepping a suspended breakpoint there's nothing more to do: + * the single-step is complete. + */ + if (!try_step_suspended_breakpoints(regs)) { + local_daif_restore(DAIF_PROCCTX); + do_el0_softstep(esr, regs); + } + exit_to_user_mode(regs); +} + +static void noinstr el0_watchpt(struct pt_regs *regs, unsigned long esr) +{ + /* Watchpoints are the only debug exception to write FAR_EL1 */ unsigned long far = read_sysreg(far_el1); enter_from_user_mode(regs); - do_debug_exception(far, esr, regs); + debug_exception_enter(regs); + do_watchpoint(far, esr, regs); + debug_exception_exit(regs); + local_daif_restore(DAIF_PROCCTX); + exit_to_user_mode(regs); +} + +static void noinstr el0_brk64(struct pt_regs *regs, unsigned long esr) +{ + enter_from_user_mode(regs); local_daif_restore(DAIF_PROCCTX); + do_el0_brk64(esr, regs); exit_to_user_mode(regs); } @@ -826,10 +938,16 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) el0_gcs(regs, esr); break; case ESR_ELx_EC_BREAKPT_LOW: + el0_breakpt(regs, esr); + break; case ESR_ELx_EC_SOFTSTP_LOW: + el0_softstp(regs, esr); + break; case ESR_ELx_EC_WATCHPT_LOW: + el0_watchpt(regs, esr); + break; case ESR_ELx_EC_BRK64: - el0_dbg(regs, esr); + el0_brk64(regs, esr); break; case ESR_ELx_EC_FPAC: el0_fpac(regs, esr); @@ -912,6 +1030,14 @@ static void noinstr el0_svc_compat(struct pt_regs *regs) exit_to_user_mode(regs); } +static void noinstr el0_bkpt32(struct pt_regs *regs, unsigned long esr) +{ + enter_from_user_mode(regs); + local_daif_restore(DAIF_PROCCTX); + do_bkpt32(esr, regs); + exit_to_user_mode(regs); +} + asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -946,10 +1072,16 @@ asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs) el0_cp15(regs, esr); break; case ESR_ELx_EC_BREAKPT_LOW: + el0_breakpt(regs, esr); + break; case ESR_ELx_EC_SOFTSTP_LOW: + el0_softstp(regs, esr); + break; case ESR_ELx_EC_WATCHPT_LOW: + el0_watchpt(regs, esr); + break; case ESR_ELx_EC_BKPT32: - el0_dbg(regs, esr); + el0_bkpt32(regs, esr); break; default: el0_inv(regs, esr); @@ -977,7 +1109,6 @@ UNHANDLED(el0t, 32, fiq) UNHANDLED(el0t, 32, error) #endif /* CONFIG_COMPAT */ -#ifdef CONFIG_VMAP_STACK asmlinkage void noinstr __noreturn handle_bad_stack(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -986,7 +1117,6 @@ asmlinkage void noinstr __noreturn handle_bad_stack(struct pt_regs *regs) arm64_enter_nmi(regs); panic_bad_stack(regs, esr, far); } -#endif /* CONFIG_VMAP_STACK */ #ifdef CONFIG_ARM_SDE_INTERFACE asmlinkage noinstr unsigned long diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 5ae2a34b50bd..f8018b5c1f9a 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -55,7 +55,6 @@ .endif sub sp, sp, #PT_REGS_SIZE -#ifdef CONFIG_VMAP_STACK /* * Test whether the SP has overflowed, without corrupting a GPR. * Task and IRQ stacks are aligned so that SP & (1 << THREAD_SHIFT) @@ -97,7 +96,6 @@ /* We were already on the overflow stack. Restore sp/x0 and carry on. */ sub sp, sp, x0 mrs x0, tpidrro_el0 -#endif b el\el\ht\()_\regsize\()_\label .org .Lventry_start\@ + 128 // Did we overflow the ventry slot? .endm @@ -540,7 +538,6 @@ SYM_CODE_START(vectors) kernel_ventry 0, t, 32, error // Error 32-bit EL0 SYM_CODE_END(vectors) -#ifdef CONFIG_VMAP_STACK SYM_CODE_START_LOCAL(__bad_stack) /* * We detected an overflow in kernel_ventry, which switched to the @@ -568,7 +565,6 @@ SYM_CODE_START_LOCAL(__bad_stack) bl handle_bad_stack ASM_BUG() SYM_CODE_END(__bad_stack) -#endif /* CONFIG_VMAP_STACK */ .macro entry_handler el:req, ht:req, regsize:req, label:req @@ -614,7 +610,7 @@ SYM_CODE_END(ret_to_kernel) SYM_CODE_START_LOCAL(ret_to_user) ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step enable_step_tsk x19, x2 -#ifdef CONFIG_GCC_PLUGIN_STACKLEAK +#ifdef CONFIG_KSTACK_ERASE bl stackleak_erase_on_task_stack #endif kernel_exit 0 @@ -825,6 +821,7 @@ SYM_CODE_END(__bp_harden_el1_vectors) * */ SYM_FUNC_START(cpu_switch_to) + save_and_disable_daif x11 mov x10, #THREAD_CPU_CONTEXT add x8, x0, x10 mov x9, sp @@ -848,6 +845,7 @@ SYM_FUNC_START(cpu_switch_to) ptrauth_keys_install_kernel x1, x8, x9, x10 scs_save x0 scs_load_current + restore_irq x11 ret SYM_FUNC_END(cpu_switch_to) NOKPROBE(cpu_switch_to) @@ -874,6 +872,7 @@ NOKPROBE(ret_from_fork) * Calls func(regs) using this CPU's irq stack and shadow irq stack. */ SYM_FUNC_START(call_on_irq_stack) + save_and_disable_daif x9 #ifdef CONFIG_SHADOW_CALL_STACK get_current_task x16 scs_save x16 @@ -888,8 +887,10 @@ SYM_FUNC_START(call_on_irq_stack) /* Move to the new stack and call the function there */ add sp, x16, #IRQ_STACK_SIZE + restore_irq x9 blr x1 + save_and_disable_daif x9 /* * Restore the SP from the FP, and restore the FP and LR from the frame * record. @@ -897,6 +898,7 @@ SYM_FUNC_START(call_on_irq_stack) mov sp, x29 ldp x29, x30, [sp], #16 scs_load_current + restore_irq x9 ret SYM_FUNC_END(call_on_irq_stack) NOKPROBE(call_on_irq_stack) @@ -1003,7 +1005,6 @@ SYM_CODE_START(__sdei_asm_handler) 1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6 2: str x19, [x5] -#ifdef CONFIG_VMAP_STACK /* * entry.S may have been using sp as a scratch register, find whether * this is a normal or critical event and switch to the appropriate @@ -1016,7 +1017,6 @@ SYM_CODE_START(__sdei_asm_handler) 2: mov x6, #SDEI_STACK_SIZE add x5, x5, x6 mov sp, x5 -#endif #ifdef CONFIG_SHADOW_CALL_STACK /* Use a separate shadow call stack for normal and critical events */ diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 722ac45f9f7b..ab76b36dce82 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -22,6 +22,7 @@ #include <asm/current.h> #include <asm/debug-monitors.h> #include <asm/esr.h> +#include <asm/exception.h> #include <asm/hw_breakpoint.h> #include <asm/traps.h> #include <asm/cputype.h> @@ -618,8 +619,7 @@ NOKPROBE_SYMBOL(toggle_bp_registers); /* * Debug exception handlers. */ -static int breakpoint_handler(unsigned long unused, unsigned long esr, - struct pt_regs *regs) +void do_breakpoint(unsigned long esr, struct pt_regs *regs) { int i, step = 0, *kernel_step; u32 ctrl_reg; @@ -662,7 +662,7 @@ unlock: } if (!step) - return 0; + return; if (user_mode(regs)) { debug_info->bps_disabled = 1; @@ -670,7 +670,7 @@ unlock: /* If we're already stepping a watchpoint, just return. */ if (debug_info->wps_disabled) - return 0; + return; if (test_thread_flag(TIF_SINGLESTEP)) debug_info->suspended_step = 1; @@ -681,7 +681,7 @@ unlock: kernel_step = this_cpu_ptr(&stepping_kernel_bp); if (*kernel_step != ARM_KERNEL_STEP_NONE) - return 0; + return; if (kernel_active_single_step()) { *kernel_step = ARM_KERNEL_STEP_SUSPEND; @@ -690,10 +690,8 @@ unlock: kernel_enable_single_step(regs); } } - - return 0; } -NOKPROBE_SYMBOL(breakpoint_handler); +NOKPROBE_SYMBOL(do_breakpoint); /* * Arm64 hardware does not always report a watchpoint hit address that matches @@ -752,8 +750,7 @@ static int watchpoint_report(struct perf_event *wp, unsigned long addr, return step; } -static int watchpoint_handler(unsigned long addr, unsigned long esr, - struct pt_regs *regs) +void do_watchpoint(unsigned long addr, unsigned long esr, struct pt_regs *regs) { int i, step = 0, *kernel_step, access, closest_match = 0; u64 min_dist = -1, dist; @@ -808,7 +805,7 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr, rcu_read_unlock(); if (!step) - return 0; + return; /* * We always disable EL0 watchpoints because the kernel can @@ -821,7 +818,7 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr, /* If we're already stepping a breakpoint, just return. */ if (debug_info->bps_disabled) - return 0; + return; if (test_thread_flag(TIF_SINGLESTEP)) debug_info->suspended_step = 1; @@ -832,7 +829,7 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr, kernel_step = this_cpu_ptr(&stepping_kernel_bp); if (*kernel_step != ARM_KERNEL_STEP_NONE) - return 0; + return; if (kernel_active_single_step()) { *kernel_step = ARM_KERNEL_STEP_SUSPEND; @@ -841,44 +838,41 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr, kernel_enable_single_step(regs); } } - - return 0; } -NOKPROBE_SYMBOL(watchpoint_handler); +NOKPROBE_SYMBOL(do_watchpoint); /* * Handle single-step exception. */ -int reinstall_suspended_bps(struct pt_regs *regs) +bool try_step_suspended_breakpoints(struct pt_regs *regs) { struct debug_info *debug_info = ¤t->thread.debug; - int handled_exception = 0, *kernel_step; - - kernel_step = this_cpu_ptr(&stepping_kernel_bp); + int *kernel_step = this_cpu_ptr(&stepping_kernel_bp); + bool handled_exception = false; /* - * Called from single-step exception handler. - * Return 0 if execution can resume, 1 if a SIGTRAP should be - * reported. + * Called from single-step exception entry. + * Return true if we stepped a breakpoint and can resume execution, + * false if we need to handle a single-step. */ if (user_mode(regs)) { if (debug_info->bps_disabled) { debug_info->bps_disabled = 0; toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL0, 1); - handled_exception = 1; + handled_exception = true; } if (debug_info->wps_disabled) { debug_info->wps_disabled = 0; toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 1); - handled_exception = 1; + handled_exception = true; } if (handled_exception) { if (debug_info->suspended_step) { debug_info->suspended_step = 0; /* Allow exception handling to fall-through. */ - handled_exception = 0; + handled_exception = false; } else { user_disable_single_step(current); } @@ -892,17 +886,17 @@ int reinstall_suspended_bps(struct pt_regs *regs) if (*kernel_step != ARM_KERNEL_STEP_SUSPEND) { kernel_disable_single_step(); - handled_exception = 1; + handled_exception = true; } else { - handled_exception = 0; + handled_exception = false; } *kernel_step = ARM_KERNEL_STEP_NONE; } - return !handled_exception; + return handled_exception; } -NOKPROBE_SYMBOL(reinstall_suspended_bps); +NOKPROBE_SYMBOL(try_step_suspended_breakpoints); /* * Context-switcher for restoring suspended breakpoints. @@ -987,12 +981,6 @@ static int __init arch_hw_breakpoint_init(void) pr_info("found %d breakpoint and %d watchpoint registers.\n", core_num_brps, core_num_wrps); - /* Register debug fault handlers. */ - hook_debug_fault_code(DBG_ESR_EVT_HWBP, breakpoint_handler, SIGTRAP, - TRAP_HWBKPT, "hw-breakpoint handler"); - hook_debug_fault_code(DBG_ESR_EVT_HWWP, watchpoint_handler, SIGTRAP, - TRAP_HWBKPT, "hw-watchpoint handler"); - /* * Reset the breakpoint resources. We assume that a halting * debugger will leave the world in a nice state for us. diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 85087e2df564..c0065a1d77cf 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -51,7 +51,6 @@ static void init_irq_scs(void) scs_alloc(early_cpu_to_node(cpu)); } -#ifdef CONFIG_VMAP_STACK static void __init init_irq_stacks(void) { int cpu; @@ -62,18 +61,6 @@ static void __init init_irq_stacks(void) per_cpu(irq_stack_ptr, cpu) = p; } } -#else -/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */ -DEFINE_PER_CPU_ALIGNED(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack); - -static void init_irq_stacks(void) -{ - int cpu; - - for_each_possible_cpu(cpu) - per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack, cpu); -} -#endif #ifndef CONFIG_PREEMPT_RT static void ____do_softirq(struct pt_regs *regs) diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index f3c4d3a8a20f..968324a79a89 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -234,23 +234,23 @@ int kgdb_arch_handle_exception(int exception_vector, int signo, return err; } -static int kgdb_brk_fn(struct pt_regs *regs, unsigned long esr) +int kgdb_brk_handler(struct pt_regs *regs, unsigned long esr) { kgdb_handle_exception(1, SIGTRAP, 0, regs); return DBG_HOOK_HANDLED; } -NOKPROBE_SYMBOL(kgdb_brk_fn) +NOKPROBE_SYMBOL(kgdb_brk_handler) -static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned long esr) +int kgdb_compiled_brk_handler(struct pt_regs *regs, unsigned long esr) { compiled_break = 1; kgdb_handle_exception(1, SIGTRAP, 0, regs); return DBG_HOOK_HANDLED; } -NOKPROBE_SYMBOL(kgdb_compiled_brk_fn); +NOKPROBE_SYMBOL(kgdb_compiled_brk_handler); -static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned long esr) +int kgdb_single_step_handler(struct pt_regs *regs, unsigned long esr) { if (!kgdb_single_step) return DBG_HOOK_ERROR; @@ -258,21 +258,7 @@ static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned long esr) kgdb_handle_exception(0, SIGTRAP, 0, regs); return DBG_HOOK_HANDLED; } -NOKPROBE_SYMBOL(kgdb_step_brk_fn); - -static struct break_hook kgdb_brkpt_hook = { - .fn = kgdb_brk_fn, - .imm = KGDB_DYN_DBG_BRK_IMM, -}; - -static struct break_hook kgdb_compiled_brkpt_hook = { - .fn = kgdb_compiled_brk_fn, - .imm = KGDB_COMPILED_DBG_BRK_IMM, -}; - -static struct step_hook kgdb_step_hook = { - .fn = kgdb_step_brk_fn -}; +NOKPROBE_SYMBOL(kgdb_single_step_handler); static int __kgdb_notify(struct die_args *args, unsigned long cmd) { @@ -311,15 +297,7 @@ static struct notifier_block kgdb_notifier = { */ int kgdb_arch_init(void) { - int ret = register_die_notifier(&kgdb_notifier); - - if (ret != 0) - return ret; - - register_kernel_break_hook(&kgdb_brkpt_hook); - register_kernel_break_hook(&kgdb_compiled_brkpt_hook); - register_kernel_step_hook(&kgdb_step_hook); - return 0; + return register_die_notifier(&kgdb_notifier); } /* @@ -329,9 +307,6 @@ int kgdb_arch_init(void) */ void kgdb_arch_exit(void) { - unregister_kernel_break_hook(&kgdb_brkpt_hook); - unregister_kernel_break_hook(&kgdb_compiled_brkpt_hook); - unregister_kernel_step_hook(&kgdb_step_hook); unregister_die_notifier(&kgdb_notifier); } diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 06bb680bfe97..40148d2725ce 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -23,6 +23,7 @@ #include <asm/insn.h> #include <asm/scs.h> #include <asm/sections.h> +#include <asm/text-patching.h> enum aarch64_reloc_op { RELOC_OP_NONE, @@ -48,7 +49,17 @@ static u64 do_reloc(enum aarch64_reloc_op reloc_op, __le32 *place, u64 val) return 0; } -static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) +#define WRITE_PLACE(place, val, mod) do { \ + __typeof__(val) __val = (val); \ + \ + if (mod->state == MODULE_STATE_UNFORMED) \ + *(place) = __val; \ + else \ + aarch64_insn_copy(place, &(__val), sizeof(*place)); \ +} while (0) + +static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len, + struct module *me) { s64 sval = do_reloc(op, place, val); @@ -66,7 +77,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) switch (len) { case 16: - *(s16 *)place = sval; + WRITE_PLACE((s16 *)place, sval, me); switch (op) { case RELOC_OP_ABS: if (sval < 0 || sval > U16_MAX) @@ -82,7 +93,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) } break; case 32: - *(s32 *)place = sval; + WRITE_PLACE((s32 *)place, sval, me); switch (op) { case RELOC_OP_ABS: if (sval < 0 || sval > U32_MAX) @@ -98,7 +109,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) } break; case 64: - *(s64 *)place = sval; + WRITE_PLACE((s64 *)place, sval, me); break; default: pr_err("Invalid length (%d) for data relocation\n", len); @@ -113,7 +124,8 @@ enum aarch64_insn_movw_imm_type { }; static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val, - int lsb, enum aarch64_insn_movw_imm_type imm_type) + int lsb, enum aarch64_insn_movw_imm_type imm_type, + struct module *me) { u64 imm; s64 sval; @@ -145,7 +157,7 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val, /* Update the instruction with the new encoding. */ insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm); - *place = cpu_to_le32(insn); + WRITE_PLACE(place, cpu_to_le32(insn), me); if (imm > U16_MAX) return -ERANGE; @@ -154,7 +166,8 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val, } static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val, - int lsb, int len, enum aarch64_insn_imm_type imm_type) + int lsb, int len, enum aarch64_insn_imm_type imm_type, + struct module *me) { u64 imm, imm_mask; s64 sval; @@ -170,7 +183,7 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val, /* Update the instruction's immediate field. */ insn = aarch64_insn_encode_immediate(imm_type, insn, imm); - *place = cpu_to_le32(insn); + WRITE_PLACE(place, cpu_to_le32(insn), me); /* * Extract the upper value bits (including the sign bit) and @@ -189,17 +202,17 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val, } static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs, - __le32 *place, u64 val) + __le32 *place, u64 val, struct module *me) { u32 insn; if (!is_forbidden_offset_for_adrp(place)) return reloc_insn_imm(RELOC_OP_PAGE, place, val, 12, 21, - AARCH64_INSN_IMM_ADR); + AARCH64_INSN_IMM_ADR, me); /* patch ADRP to ADR if it is in range */ if (!reloc_insn_imm(RELOC_OP_PREL, place, val & ~0xfff, 0, 21, - AARCH64_INSN_IMM_ADR)) { + AARCH64_INSN_IMM_ADR, me)) { insn = le32_to_cpu(*place); insn &= ~BIT(31); } else { @@ -211,7 +224,7 @@ static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs, AARCH64_INSN_BRANCH_NOLINK); } - *place = cpu_to_le32(insn); + WRITE_PLACE(place, cpu_to_le32(insn), me); return 0; } @@ -255,23 +268,23 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, /* Data relocations. */ case R_AARCH64_ABS64: overflow_check = false; - ovf = reloc_data(RELOC_OP_ABS, loc, val, 64); + ovf = reloc_data(RELOC_OP_ABS, loc, val, 64, me); break; case R_AARCH64_ABS32: - ovf = reloc_data(RELOC_OP_ABS, loc, val, 32); + ovf = reloc_data(RELOC_OP_ABS, loc, val, 32, me); break; case R_AARCH64_ABS16: - ovf = reloc_data(RELOC_OP_ABS, loc, val, 16); + ovf = reloc_data(RELOC_OP_ABS, loc, val, 16, me); break; case R_AARCH64_PREL64: overflow_check = false; - ovf = reloc_data(RELOC_OP_PREL, loc, val, 64); + ovf = reloc_data(RELOC_OP_PREL, loc, val, 64, me); break; case R_AARCH64_PREL32: - ovf = reloc_data(RELOC_OP_PREL, loc, val, 32); + ovf = reloc_data(RELOC_OP_PREL, loc, val, 32, me); break; case R_AARCH64_PREL16: - ovf = reloc_data(RELOC_OP_PREL, loc, val, 16); + ovf = reloc_data(RELOC_OP_PREL, loc, val, 16, me); break; /* MOVW instruction relocations. */ @@ -280,88 +293,88 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, fallthrough; case R_AARCH64_MOVW_UABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, - AARCH64_INSN_IMM_MOVKZ); + AARCH64_INSN_IMM_MOVKZ, me); break; case R_AARCH64_MOVW_UABS_G1_NC: overflow_check = false; fallthrough; case R_AARCH64_MOVW_UABS_G1: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, - AARCH64_INSN_IMM_MOVKZ); + AARCH64_INSN_IMM_MOVKZ, me); break; case R_AARCH64_MOVW_UABS_G2_NC: overflow_check = false; fallthrough; case R_AARCH64_MOVW_UABS_G2: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, - AARCH64_INSN_IMM_MOVKZ); + AARCH64_INSN_IMM_MOVKZ, me); break; case R_AARCH64_MOVW_UABS_G3: /* We're using the top bits so we can't overflow. */ overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48, - AARCH64_INSN_IMM_MOVKZ); + AARCH64_INSN_IMM_MOVKZ, me); break; case R_AARCH64_MOVW_SABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, - AARCH64_INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ, me); break; case R_AARCH64_MOVW_SABS_G1: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, - AARCH64_INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ, me); break; case R_AARCH64_MOVW_SABS_G2: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, - AARCH64_INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ, me); break; case R_AARCH64_MOVW_PREL_G0_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, - AARCH64_INSN_IMM_MOVKZ); + AARCH64_INSN_IMM_MOVKZ, me); break; case R_AARCH64_MOVW_PREL_G0: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, - AARCH64_INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ, me); break; case R_AARCH64_MOVW_PREL_G1_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, - AARCH64_INSN_IMM_MOVKZ); + AARCH64_INSN_IMM_MOVKZ, me); break; case R_AARCH64_MOVW_PREL_G1: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, - AARCH64_INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ, me); break; case R_AARCH64_MOVW_PREL_G2_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, - AARCH64_INSN_IMM_MOVKZ); + AARCH64_INSN_IMM_MOVKZ, me); break; case R_AARCH64_MOVW_PREL_G2: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, - AARCH64_INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ, me); break; case R_AARCH64_MOVW_PREL_G3: /* We're using the top bits so we can't overflow. */ overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 48, - AARCH64_INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ, me); break; /* Immediate instruction relocations. */ case R_AARCH64_LD_PREL_LO19: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19, - AARCH64_INSN_IMM_19); + AARCH64_INSN_IMM_19, me); break; case R_AARCH64_ADR_PREL_LO21: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, - AARCH64_INSN_IMM_ADR); + AARCH64_INSN_IMM_ADR, me); break; case R_AARCH64_ADR_PREL_PG_HI21_NC: overflow_check = false; fallthrough; case R_AARCH64_ADR_PREL_PG_HI21: - ovf = reloc_insn_adrp(me, sechdrs, loc, val); + ovf = reloc_insn_adrp(me, sechdrs, loc, val, me); if (ovf && ovf != -ERANGE) return ovf; break; @@ -369,46 +382,46 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_AARCH64_LDST8_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 0, 12, - AARCH64_INSN_IMM_12); + AARCH64_INSN_IMM_12, me); break; case R_AARCH64_LDST16_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 1, 11, - AARCH64_INSN_IMM_12); + AARCH64_INSN_IMM_12, me); break; case R_AARCH64_LDST32_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 2, 10, - AARCH64_INSN_IMM_12); + AARCH64_INSN_IMM_12, me); break; case R_AARCH64_LDST64_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 3, 9, - AARCH64_INSN_IMM_12); + AARCH64_INSN_IMM_12, me); break; case R_AARCH64_LDST128_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 4, 8, - AARCH64_INSN_IMM_12); + AARCH64_INSN_IMM_12, me); break; case R_AARCH64_TSTBR14: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 14, - AARCH64_INSN_IMM_14); + AARCH64_INSN_IMM_14, me); break; case R_AARCH64_CONDBR19: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19, - AARCH64_INSN_IMM_19); + AARCH64_INSN_IMM_19, me); break; case R_AARCH64_JUMP26: case R_AARCH64_CALL26: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, - AARCH64_INSN_IMM_26); + AARCH64_INSN_IMM_26, me); if (ovf == -ERANGE) { val = module_emit_plt_entry(me, sechdrs, loc, &rel[i], sym); if (!val) return -ENOEXEC; ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, - 26, AARCH64_INSN_IMM_26); + 26, AARCH64_INSN_IMM_26, me); } break; diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 2fbfd27ff5f2..e5e773844889 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -200,7 +200,7 @@ static void mte_update_sctlr_user(struct task_struct *task) * program requested values go with what was requested. */ resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl; - sctlr &= ~SCTLR_EL1_TCF0_MASK; + sctlr &= ~(SCTLR_EL1_TCF0_MASK | SCTLR_EL1_TCSO0_MASK); /* * Pick an actual setting. The order in which we check for * set bits and map into register values determines our @@ -212,6 +212,10 @@ static void mte_update_sctlr_user(struct task_struct *task) sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, ASYNC); else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC) sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, SYNC); + + if (mte_ctrl & MTE_CTRL_STORE_ONLY) + sctlr |= SYS_FIELD_PREP(SCTLR_EL1, TCSO0, 1); + task->thread.sctlr_user = sctlr; } @@ -371,6 +375,9 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg) (arg & PR_MTE_TCF_SYNC)) mte_ctrl |= MTE_CTRL_TCF_ASYMM; + if (arg & PR_MTE_STORE_ONLY) + mte_ctrl |= MTE_CTRL_STORE_ONLY; + task->thread.mte_ctrl = mte_ctrl; if (task == current) { preempt_disable(); @@ -398,6 +405,8 @@ long get_mte_ctrl(struct task_struct *task) ret |= PR_MTE_TCF_ASYNC; if (mte_ctrl & MTE_CTRL_TCF_SYNC) ret |= PR_MTE_TCF_SYNC; + if (mte_ctrl & MTE_CTRL_STORE_ONLY) + ret |= PR_MTE_STORE_ONLY; return ret; } diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile index 4d11a8c29181..be92d73c25b2 100644 --- a/arch/arm64/kernel/pi/Makefile +++ b/arch/arm64/kernel/pi/Makefile @@ -2,7 +2,7 @@ # Copyright 2022 Google LLC KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \ - -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \ + -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_KSTACK_ERASE) \ $(DISABLE_LATENT_ENTROPY_PLUGIN) \ $(call cc-option,-mbranch-protection=none) \ -I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \ @@ -41,4 +41,4 @@ obj-y := idreg-override.pi.o \ obj-$(CONFIG_RELOCATABLE) += relocate.pi.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr_early.pi.o obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.pi.o -extra-y := $(patsubst %.pi.o,%.o,$(obj-y)) +targets := $(patsubst %.pi.o,%.o,$(obj-y)) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index d9e462eafb95..0c5d408afd95 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -292,8 +292,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) return 0; } -static int __kprobes -kprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr) +int __kprobes +kprobe_brk_handler(struct pt_regs *regs, unsigned long esr) { struct kprobe *p, *cur_kprobe; struct kprobe_ctlblk *kcb; @@ -336,13 +336,8 @@ kprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr) return DBG_HOOK_HANDLED; } -static struct break_hook kprobes_break_hook = { - .imm = KPROBES_BRK_IMM, - .fn = kprobe_breakpoint_handler, -}; - -static int __kprobes -kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned long esr) +int __kprobes +kprobe_ss_brk_handler(struct pt_regs *regs, unsigned long esr) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); unsigned long addr = instruction_pointer(regs); @@ -360,13 +355,8 @@ kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned long esr) return DBG_HOOK_ERROR; } -static struct break_hook kprobes_break_ss_hook = { - .imm = KPROBES_BRK_SS_IMM, - .fn = kprobe_breakpoint_ss_handler, -}; - -static int __kprobes -kretprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr) +int __kprobes +kretprobe_brk_handler(struct pt_regs *regs, unsigned long esr) { if (regs->pc != (unsigned long)__kretprobe_trampoline) return DBG_HOOK_ERROR; @@ -375,11 +365,6 @@ kretprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr) return DBG_HOOK_HANDLED; } -static struct break_hook kretprobes_break_hook = { - .imm = KRETPROBES_BRK_IMM, - .fn = kretprobe_breakpoint_handler, -}; - /* * Provide a blacklist of symbols identifying ranges which cannot be kprobed. * This blacklist is exposed to userspace via debugfs (kprobes/blacklist). @@ -422,9 +407,5 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p) int __init arch_init_kprobes(void) { - register_kernel_break_hook(&kprobes_break_hook); - register_kernel_break_hook(&kprobes_break_ss_hook); - register_kernel_break_hook(&kretprobes_break_hook); - return 0; } diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S index a362f3dbb3d1..b60739d3983f 100644 --- a/arch/arm64/kernel/probes/kprobes_trampoline.S +++ b/arch/arm64/kernel/probes/kprobes_trampoline.S @@ -12,7 +12,7 @@ SYM_CODE_START(__kretprobe_trampoline) /* * Trigger a breakpoint exception. The PC will be adjusted by - * kretprobe_breakpoint_handler(), and no subsequent instructions will + * kretprobe_brk_handler(), and no subsequent instructions will * be executed from the trampoline. */ brk #KRETPROBES_BRK_IMM diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index cb3d05af36e3..1f91fd2a8187 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -173,7 +173,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self, return NOTIFY_DONE; } -static int uprobe_breakpoint_handler(struct pt_regs *regs, +int uprobe_brk_handler(struct pt_regs *regs, unsigned long esr) { if (uprobe_pre_sstep_notifier(regs)) @@ -182,7 +182,7 @@ static int uprobe_breakpoint_handler(struct pt_regs *regs, return DBG_HOOK_ERROR; } -static int uprobe_single_step_handler(struct pt_regs *regs, +int uprobe_single_step_handler(struct pt_regs *regs, unsigned long esr) { struct uprobe_task *utask = current->utask; @@ -194,23 +194,3 @@ static int uprobe_single_step_handler(struct pt_regs *regs, return DBG_HOOK_ERROR; } -/* uprobe breakpoint handler hook */ -static struct break_hook uprobes_break_hook = { - .imm = UPROBES_BRK_IMM, - .fn = uprobe_breakpoint_handler, -}; - -/* uprobe single step handler hook */ -static struct step_hook uprobes_step_hook = { - .fn = uprobe_single_step_handler, -}; - -static int __init arch_init_uprobes(void) -{ - register_user_break_hook(&uprobes_break_hook); - register_user_step_hook(&uprobes_step_hook); - - return 0; -} - -device_initcall(arch_init_uprobes); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 08b7042a2e2d..96482a1412c6 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -307,13 +307,13 @@ static int copy_thread_gcs(struct task_struct *p, p->thread.gcs_base = 0; p->thread.gcs_size = 0; + p->thread.gcs_el0_mode = current->thread.gcs_el0_mode; + p->thread.gcs_el0_locked = current->thread.gcs_el0_locked; + gcs = gcs_alloc_thread_stack(p, args); if (IS_ERR_VALUE(gcs)) return PTR_ERR((void *)gcs); - p->thread.gcs_el0_mode = current->thread.gcs_el0_mode; - p->thread.gcs_el0_locked = current->thread.gcs_el0_locked; - return 0; } @@ -341,7 +341,6 @@ void flush_thread(void) void arch_release_task_struct(struct task_struct *tsk) { fpsimd_release_task(tsk); - gcs_free(tsk); } int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) @@ -856,10 +855,14 @@ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg) if (is_compat_thread(ti)) return -EINVAL; - if (system_supports_mte()) + if (system_supports_mte()) { valid_mask |= PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC \ | PR_MTE_TAG_MASK; + if (cpus_have_cap(ARM64_MTE_STORE_ONLY)) + valid_mask |= PR_MTE_STORE_ONLY; + } + if (arg & ~valid_mask) return -EINVAL; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index ee94b72bf8fb..4b001121c72d 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1586,7 +1586,7 @@ enum aarch64_regset { static const struct user_regset aarch64_regsets[] = { [REGSET_GPR] = { - .core_note_type = NT_PRSTATUS, + USER_REGSET_NOTE_TYPE(PRSTATUS), .n = sizeof(struct user_pt_regs) / sizeof(u64), .size = sizeof(u64), .align = sizeof(u64), @@ -1594,7 +1594,7 @@ static const struct user_regset aarch64_regsets[] = { .set = gpr_set }, [REGSET_FPR] = { - .core_note_type = NT_PRFPREG, + USER_REGSET_NOTE_TYPE(PRFPREG), .n = sizeof(struct user_fpsimd_state) / sizeof(u32), /* * We pretend we have 32-bit registers because the fpsr and @@ -1607,7 +1607,7 @@ static const struct user_regset aarch64_regsets[] = { .set = fpr_set }, [REGSET_TLS] = { - .core_note_type = NT_ARM_TLS, + USER_REGSET_NOTE_TYPE(ARM_TLS), .n = 2, .size = sizeof(void *), .align = sizeof(void *), @@ -1616,7 +1616,7 @@ static const struct user_regset aarch64_regsets[] = { }, #ifdef CONFIG_HAVE_HW_BREAKPOINT [REGSET_HW_BREAK] = { - .core_note_type = NT_ARM_HW_BREAK, + USER_REGSET_NOTE_TYPE(ARM_HW_BREAK), .n = sizeof(struct user_hwdebug_state) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), @@ -1624,7 +1624,7 @@ static const struct user_regset aarch64_regsets[] = { .set = hw_break_set, }, [REGSET_HW_WATCH] = { - .core_note_type = NT_ARM_HW_WATCH, + USER_REGSET_NOTE_TYPE(ARM_HW_WATCH), .n = sizeof(struct user_hwdebug_state) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), @@ -1633,7 +1633,7 @@ static const struct user_regset aarch64_regsets[] = { }, #endif [REGSET_SYSTEM_CALL] = { - .core_note_type = NT_ARM_SYSTEM_CALL, + USER_REGSET_NOTE_TYPE(ARM_SYSTEM_CALL), .n = 1, .size = sizeof(int), .align = sizeof(int), @@ -1641,7 +1641,7 @@ static const struct user_regset aarch64_regsets[] = { .set = system_call_set, }, [REGSET_FPMR] = { - .core_note_type = NT_ARM_FPMR, + USER_REGSET_NOTE_TYPE(ARM_FPMR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), @@ -1650,7 +1650,7 @@ static const struct user_regset aarch64_regsets[] = { }, #ifdef CONFIG_ARM64_SVE [REGSET_SVE] = { /* Scalable Vector Extension */ - .core_note_type = NT_ARM_SVE, + USER_REGSET_NOTE_TYPE(ARM_SVE), .n = DIV_ROUND_UP(SVE_PT_SIZE(ARCH_SVE_VQ_MAX, SVE_PT_REGS_SVE), SVE_VQ_BYTES), @@ -1662,7 +1662,7 @@ static const struct user_regset aarch64_regsets[] = { #endif #ifdef CONFIG_ARM64_SME [REGSET_SSVE] = { /* Streaming mode SVE */ - .core_note_type = NT_ARM_SSVE, + USER_REGSET_NOTE_TYPE(ARM_SSVE), .n = DIV_ROUND_UP(SVE_PT_SIZE(SME_VQ_MAX, SVE_PT_REGS_SVE), SVE_VQ_BYTES), .size = SVE_VQ_BYTES, @@ -1671,7 +1671,7 @@ static const struct user_regset aarch64_regsets[] = { .set = ssve_set, }, [REGSET_ZA] = { /* SME ZA */ - .core_note_type = NT_ARM_ZA, + USER_REGSET_NOTE_TYPE(ARM_ZA), /* * ZA is a single register but it's variably sized and * the ptrace core requires that the size of any data @@ -1687,7 +1687,7 @@ static const struct user_regset aarch64_regsets[] = { .set = za_set, }, [REGSET_ZT] = { /* SME ZT */ - .core_note_type = NT_ARM_ZT, + USER_REGSET_NOTE_TYPE(ARM_ZT), .n = 1, .size = ZT_SIG_REG_BYTES, .align = sizeof(u64), @@ -1697,7 +1697,7 @@ static const struct user_regset aarch64_regsets[] = { #endif #ifdef CONFIG_ARM64_PTR_AUTH [REGSET_PAC_MASK] = { - .core_note_type = NT_ARM_PAC_MASK, + USER_REGSET_NOTE_TYPE(ARM_PAC_MASK), .n = sizeof(struct user_pac_mask) / sizeof(u64), .size = sizeof(u64), .align = sizeof(u64), @@ -1705,7 +1705,7 @@ static const struct user_regset aarch64_regsets[] = { /* this cannot be set dynamically */ }, [REGSET_PAC_ENABLED_KEYS] = { - .core_note_type = NT_ARM_PAC_ENABLED_KEYS, + USER_REGSET_NOTE_TYPE(ARM_PAC_ENABLED_KEYS), .n = 1, .size = sizeof(long), .align = sizeof(long), @@ -1714,7 +1714,7 @@ static const struct user_regset aarch64_regsets[] = { }, #ifdef CONFIG_CHECKPOINT_RESTORE [REGSET_PACA_KEYS] = { - .core_note_type = NT_ARM_PACA_KEYS, + USER_REGSET_NOTE_TYPE(ARM_PACA_KEYS), .n = sizeof(struct user_pac_address_keys) / sizeof(__uint128_t), .size = sizeof(__uint128_t), .align = sizeof(__uint128_t), @@ -1722,7 +1722,7 @@ static const struct user_regset aarch64_regsets[] = { .set = pac_address_keys_set, }, [REGSET_PACG_KEYS] = { - .core_note_type = NT_ARM_PACG_KEYS, + USER_REGSET_NOTE_TYPE(ARM_PACG_KEYS), .n = sizeof(struct user_pac_generic_keys) / sizeof(__uint128_t), .size = sizeof(__uint128_t), .align = sizeof(__uint128_t), @@ -1733,7 +1733,7 @@ static const struct user_regset aarch64_regsets[] = { #endif #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI [REGSET_TAGGED_ADDR_CTRL] = { - .core_note_type = NT_ARM_TAGGED_ADDR_CTRL, + USER_REGSET_NOTE_TYPE(ARM_TAGGED_ADDR_CTRL), .n = 1, .size = sizeof(long), .align = sizeof(long), @@ -1743,7 +1743,7 @@ static const struct user_regset aarch64_regsets[] = { #endif #ifdef CONFIG_ARM64_POE [REGSET_POE] = { - .core_note_type = NT_ARM_POE, + USER_REGSET_NOTE_TYPE(ARM_POE), .n = 1, .size = sizeof(long), .align = sizeof(long), @@ -1753,7 +1753,7 @@ static const struct user_regset aarch64_regsets[] = { #endif #ifdef CONFIG_ARM64_GCS [REGSET_GCS] = { - .core_note_type = NT_ARM_GCS, + USER_REGSET_NOTE_TYPE(ARM_GCS), .n = sizeof(struct user_gcs) / sizeof(u64), .size = sizeof(u64), .align = sizeof(u64), @@ -1943,7 +1943,7 @@ static int compat_tls_set(struct task_struct *target, static const struct user_regset aarch32_regsets[] = { [REGSET_COMPAT_GPR] = { - .core_note_type = NT_PRSTATUS, + USER_REGSET_NOTE_TYPE(PRSTATUS), .n = COMPAT_ELF_NGREG, .size = sizeof(compat_elf_greg_t), .align = sizeof(compat_elf_greg_t), @@ -1951,7 +1951,7 @@ static const struct user_regset aarch32_regsets[] = { .set = compat_gpr_set }, [REGSET_COMPAT_VFP] = { - .core_note_type = NT_ARM_VFP, + USER_REGSET_NOTE_TYPE(ARM_VFP), .n = VFP_STATE_SIZE / sizeof(compat_ulong_t), .size = sizeof(compat_ulong_t), .align = sizeof(compat_ulong_t), @@ -1968,7 +1968,7 @@ static const struct user_regset_view user_aarch32_view = { static const struct user_regset aarch32_ptrace_regsets[] = { [REGSET_GPR] = { - .core_note_type = NT_PRSTATUS, + USER_REGSET_NOTE_TYPE(PRSTATUS), .n = COMPAT_ELF_NGREG, .size = sizeof(compat_elf_greg_t), .align = sizeof(compat_elf_greg_t), @@ -1976,7 +1976,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = { .set = compat_gpr_set }, [REGSET_FPR] = { - .core_note_type = NT_ARM_VFP, + USER_REGSET_NOTE_TYPE(ARM_VFP), .n = VFP_STATE_SIZE / sizeof(compat_ulong_t), .size = sizeof(compat_ulong_t), .align = sizeof(compat_ulong_t), @@ -1984,7 +1984,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = { .set = compat_vfp_set }, [REGSET_TLS] = { - .core_note_type = NT_ARM_TLS, + USER_REGSET_NOTE_TYPE(ARM_TLS), .n = 1, .size = sizeof(compat_ulong_t), .align = sizeof(compat_ulong_t), @@ -1993,7 +1993,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = { }, #ifdef CONFIG_HAVE_HW_BREAKPOINT [REGSET_HW_BREAK] = { - .core_note_type = NT_ARM_HW_BREAK, + USER_REGSET_NOTE_TYPE(ARM_HW_BREAK), .n = sizeof(struct user_hwdebug_state) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), @@ -2001,7 +2001,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = { .set = hw_break_set, }, [REGSET_HW_WATCH] = { - .core_note_type = NT_ARM_HW_WATCH, + USER_REGSET_NOTE_TYPE(ARM_HW_WATCH), .n = sizeof(struct user_hwdebug_state) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), @@ -2010,7 +2010,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = { }, #endif [REGSET_SYSTEM_CALL] = { - .core_note_type = NT_ARM_SYSTEM_CALL, + USER_REGSET_NOTE_TYPE(ARM_SYSTEM_CALL), .n = 1, .size = sizeof(int), .align = sizeof(int), diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 255d12f881c2..6f24a0251e18 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -34,10 +34,8 @@ unsigned long sdei_exit_mode; DECLARE_PER_CPU(unsigned long *, sdei_stack_normal_ptr); DECLARE_PER_CPU(unsigned long *, sdei_stack_critical_ptr); -#ifdef CONFIG_VMAP_STACK DEFINE_PER_CPU(unsigned long *, sdei_stack_normal_ptr); DEFINE_PER_CPU(unsigned long *, sdei_stack_critical_ptr); -#endif DECLARE_PER_CPU(unsigned long *, sdei_shadow_call_stack_normal_ptr); DECLARE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr); @@ -65,8 +63,7 @@ static void free_sdei_stacks(void) { int cpu; - if (!IS_ENABLED(CONFIG_VMAP_STACK)) - return; + BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK)); for_each_possible_cpu(cpu) { _free_sdei_stack(&sdei_stack_normal_ptr, cpu); @@ -91,8 +88,7 @@ static int init_sdei_stacks(void) int cpu; int err = 0; - if (!IS_ENABLED(CONFIG_VMAP_STACK)) - return 0; + BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK)); for_each_possible_cpu(cpu) { err = _init_sdei_stack(&sdei_stack_normal_ptr, cpu); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 417140cd399b..db3f972f8cd9 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -95,8 +95,11 @@ static void save_reset_user_access_state(struct user_access_state *ua_state) ua_state->por_el0 = read_sysreg_s(SYS_POR_EL0); write_sysreg_s(por_enable_all, SYS_POR_EL0); - /* Ensure that any subsequent uaccess observes the updated value */ - isb(); + /* + * No ISB required as we can tolerate spurious Overlay faults - + * the fault handler will check again based on the new value + * of POR_EL0. + */ } } diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 1d9d51d7627f..3ebcf8c53fb0 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -152,6 +152,8 @@ kunwind_recover_return_address(struct kunwind_state *state) orig_pc = kretprobe_find_ret_addr(state->task, (void *)state->common.fp, &state->kr_cur); + if (!orig_pc) + return -EINVAL; state->common.pc = orig_pc; state->flags.kretprobe = 1; } @@ -277,21 +279,24 @@ kunwind_next(struct kunwind_state *state) typedef bool (*kunwind_consume_fn)(const struct kunwind_state *state, void *cookie); -static __always_inline void +static __always_inline int do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state, void *cookie) { - if (kunwind_recover_return_address(state)) - return; + int ret; - while (1) { - int ret; + ret = kunwind_recover_return_address(state); + if (ret) + return ret; + while (1) { if (!consume_state(state, cookie)) - break; + return -EINVAL; ret = kunwind_next(state); + if (ret == -ENOENT) + return 0; if (ret < 0) - break; + return ret; } } @@ -324,7 +329,7 @@ do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state, : stackinfo_get_unknown(); \ }) -static __always_inline void +static __always_inline int kunwind_stack_walk(kunwind_consume_fn consume_state, void *cookie, struct task_struct *task, struct pt_regs *regs) @@ -332,10 +337,8 @@ kunwind_stack_walk(kunwind_consume_fn consume_state, struct stack_info stacks[] = { stackinfo_get_task(task), STACKINFO_CPU(irq), -#if defined(CONFIG_VMAP_STACK) STACKINFO_CPU(overflow), -#endif -#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_ARM_SDE_INTERFACE) +#if defined(CONFIG_ARM_SDE_INTERFACE) STACKINFO_SDEI(normal), STACKINFO_SDEI(critical), #endif @@ -352,7 +355,7 @@ kunwind_stack_walk(kunwind_consume_fn consume_state, if (regs) { if (task != current) - return; + return -EINVAL; kunwind_init_from_regs(&state, regs); } else if (task == current) { kunwind_init_from_caller(&state); @@ -360,7 +363,7 @@ kunwind_stack_walk(kunwind_consume_fn consume_state, kunwind_init_from_task(&state, task); } - do_kunwind(&state, consume_state, cookie); + return do_kunwind(&state, consume_state, cookie); } struct kunwind_consume_entry_data { @@ -387,6 +390,36 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry, kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs); } +static __always_inline bool +arch_reliable_kunwind_consume_entry(const struct kunwind_state *state, void *cookie) +{ + /* + * At an exception boundary we can reliably consume the saved PC. We do + * not know whether the LR was live when the exception was taken, and + * so we cannot perform the next unwind step reliably. + * + * All that matters is whether the *entire* unwind is reliable, so give + * up as soon as we hit an exception boundary. + */ + if (state->source == KUNWIND_SOURCE_REGS_PC) + return false; + + return arch_kunwind_consume_entry(state, cookie); +} + +noinline noinstr int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, + void *cookie, + struct task_struct *task) +{ + struct kunwind_consume_entry_data data = { + .consume_entry = consume_entry, + .cookie = cookie, + }; + + return kunwind_stack_walk(arch_reliable_kunwind_consume_entry, &data, + task, NULL); +} + struct bpf_unwind_consume_entry_data { bool (*consume_entry)(void *cookie, u64 ip, u64 sp, u64 fp); void *cookie; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 9bfa5c944379..f528b6041f6a 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -454,7 +454,7 @@ void do_el0_undef(struct pt_regs *regs, unsigned long esr) u32 insn; /* check for AArch32 breakpoint instructions */ - if (!aarch32_break_handler(regs)) + if (try_handle_aarch32_break(regs)) return; if (user_insn_read(regs, &insn)) @@ -894,8 +894,6 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr) "Bad EL0 synchronous exception"); } -#ifdef CONFIG_VMAP_STACK - DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack) __aligned(16); @@ -927,10 +925,10 @@ void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigne nmi_panic(NULL, "kernel stack overflow"); cpu_park_loop(); } -#endif void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr) { + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK); console_verbose(); pr_crit("SError Interrupt on CPU%d, code 0x%016lx -- %s\n", @@ -987,7 +985,7 @@ void do_serror(struct pt_regs *regs, unsigned long esr) int is_valid_bugaddr(unsigned long addr) { /* - * bug_handler() only called for BRK #BUG_BRK_IMM. + * bug_brk_handler() only called for BRK #BUG_BRK_IMM. * So the answer is trivial -- any spurious instances with no * bug table entry will be rejected by report_bug() and passed * back to the debug-monitors code and handled as a fatal @@ -997,7 +995,7 @@ int is_valid_bugaddr(unsigned long addr) } #endif -static int bug_handler(struct pt_regs *regs, unsigned long esr) +int bug_brk_handler(struct pt_regs *regs, unsigned long esr) { switch (report_bug(regs->pc, regs)) { case BUG_TRAP_TYPE_BUG: @@ -1017,13 +1015,8 @@ static int bug_handler(struct pt_regs *regs, unsigned long esr) return DBG_HOOK_HANDLED; } -static struct break_hook bug_break_hook = { - .fn = bug_handler, - .imm = BUG_BRK_IMM, -}; - #ifdef CONFIG_CFI_CLANG -static int cfi_handler(struct pt_regs *regs, unsigned long esr) +int cfi_brk_handler(struct pt_regs *regs, unsigned long esr) { unsigned long target; u32 type; @@ -1046,15 +1039,9 @@ static int cfi_handler(struct pt_regs *regs, unsigned long esr) arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); return DBG_HOOK_HANDLED; } - -static struct break_hook cfi_break_hook = { - .fn = cfi_handler, - .imm = CFI_BRK_IMM_BASE, - .mask = CFI_BRK_IMM_MASK, -}; #endif /* CONFIG_CFI_CLANG */ -static int reserved_fault_handler(struct pt_regs *regs, unsigned long esr) +int reserved_fault_brk_handler(struct pt_regs *regs, unsigned long esr) { pr_err("%s generated an invalid instruction at %pS!\n", "Kernel text patching", @@ -1064,11 +1051,6 @@ static int reserved_fault_handler(struct pt_regs *regs, unsigned long esr) return DBG_HOOK_ERROR; } -static struct break_hook fault_break_hook = { - .fn = reserved_fault_handler, - .imm = FAULT_BRK_IMM, -}; - #ifdef CONFIG_KASAN_SW_TAGS #define KASAN_ESR_RECOVER 0x20 @@ -1076,7 +1058,7 @@ static struct break_hook fault_break_hook = { #define KASAN_ESR_SIZE_MASK 0x0f #define KASAN_ESR_SIZE(esr) (1 << ((esr) & KASAN_ESR_SIZE_MASK)) -static int kasan_handler(struct pt_regs *regs, unsigned long esr) +int kasan_brk_handler(struct pt_regs *regs, unsigned long esr) { bool recover = esr & KASAN_ESR_RECOVER; bool write = esr & KASAN_ESR_WRITE; @@ -1107,62 +1089,12 @@ static int kasan_handler(struct pt_regs *regs, unsigned long esr) arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); return DBG_HOOK_HANDLED; } - -static struct break_hook kasan_break_hook = { - .fn = kasan_handler, - .imm = KASAN_BRK_IMM, - .mask = KASAN_BRK_MASK, -}; #endif #ifdef CONFIG_UBSAN_TRAP -static int ubsan_handler(struct pt_regs *regs, unsigned long esr) +int ubsan_brk_handler(struct pt_regs *regs, unsigned long esr) { die(report_ubsan_failure(esr & UBSAN_BRK_MASK), regs, esr); return DBG_HOOK_HANDLED; } - -static struct break_hook ubsan_break_hook = { - .fn = ubsan_handler, - .imm = UBSAN_BRK_IMM, - .mask = UBSAN_BRK_MASK, -}; -#endif - -/* - * Initial handler for AArch64 BRK exceptions - * This handler only used until debug_traps_init(). - */ -int __init early_brk64(unsigned long addr, unsigned long esr, - struct pt_regs *regs) -{ -#ifdef CONFIG_CFI_CLANG - if (esr_is_cfi_brk(esr)) - return cfi_handler(regs, esr) != DBG_HOOK_HANDLED; -#endif -#ifdef CONFIG_KASAN_SW_TAGS - if ((esr_brk_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM) - return kasan_handler(regs, esr) != DBG_HOOK_HANDLED; -#endif -#ifdef CONFIG_UBSAN_TRAP - if (esr_is_ubsan_brk(esr)) - return ubsan_handler(regs, esr) != DBG_HOOK_HANDLED; -#endif - return bug_handler(regs, esr) != DBG_HOOK_HANDLED; -} - -void __init trap_init(void) -{ - register_kernel_break_hook(&bug_break_hook); -#ifdef CONFIG_CFI_CLANG - register_kernel_break_hook(&cfi_break_hook); -#endif - register_kernel_break_hook(&fault_break_hook); -#ifdef CONFIG_KASAN_SW_TAGS - register_kernel_break_hook(&kasan_break_hook); -#endif -#ifdef CONFIG_UBSAN_TRAP - register_kernel_break_hook(&ubsan_break_hook); #endif - debug_traps_init(); -} diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 5e27e46aa496..7dec05dd33b7 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -36,7 +36,8 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO # -Wmissing-prototypes and -Wmissing-declarations are removed from # the CFLAGS to make possible to build the kernel with CONFIG_WERROR enabled. CC_FLAGS_REMOVE_VDSO := $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \ - $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \ + $(RANDSTRUCT_CFLAGS) $(KSTACK_ERASE_CFLAGS) \ + $(GCC_PLUGINS_CFLAGS) \ $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ -Wmissing-prototypes -Wmissing-declarations diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c index dcd25322127c..3093037dcb7b 100644 --- a/arch/arm64/kernel/watchdog_hld.c +++ b/arch/arm64/kernel/watchdog_hld.c @@ -34,3 +34,61 @@ bool __init arch_perf_nmi_is_available(void) */ return arm_pmu_irq_is_nmi(); } + +static int watchdog_perf_update_period(void *data) +{ + int cpu = smp_processor_id(); + u64 max_cpu_freq, new_period; + + max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL; + if (!max_cpu_freq) + return 0; + + new_period = watchdog_thresh * max_cpu_freq; + hardlockup_detector_perf_adjust_period(new_period); + + return 0; +} + +static int watchdog_freq_notifier_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_policy *policy = data; + int cpu; + + if (val != CPUFREQ_CREATE_POLICY) + return NOTIFY_DONE; + + /* + * Let each online CPU related to the policy update the period by their + * own. This will serialize with the framework on start/stop the lockup + * detector (softlockup_{start,stop}_all) and avoid potential race + * condition. Otherwise we may have below theoretical race condition: + * (core 0/1 share the same policy) + * [core 0] [core 1] + * hardlockup_detector_event_create() + * hw_nmi_get_sample_period() + * (cpufreq registered, notifier callback invoked) + * watchdog_freq_notifier_callback() + * watchdog_perf_update_period() + * (since core 1's event's not yet created, + * the period is not set) + * perf_event_create_kernel_counter() + * (event's period is SAFE_MAX_CPU_FREQ) + */ + for_each_cpu(cpu, policy->cpus) + smp_call_on_cpu(cpu, watchdog_perf_update_period, NULL, false); + + return NOTIFY_DONE; +} + +static struct notifier_block watchdog_freq_notifier = { + .notifier_call = watchdog_freq_notifier_callback, +}; + +static int __init init_watchdog_freq_notifier(void) +{ + return cpufreq_register_notifier(&watchdog_freq_notifier, + CPUFREQ_POLICY_NOTIFIER); +} +core_initcall(init_watchdog_freq_notifier); |