diff options
Diffstat (limited to 'arch/powerpc/kernel')
140 files changed, 7464 insertions, 6411 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 9b6146056e48..2f0a2e69c607 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -3,9 +3,6 @@ # Makefile for the linux kernel. # -ifdef CONFIG_PPC64 -CFLAGS_prom_init.o += $(NO_MINIMAL_TOC) -endif ifdef CONFIG_PPC32 CFLAGS_prom_init.o += -fPIC CFLAGS_btext.o += -fPIC @@ -54,6 +51,12 @@ CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING endif +KCSAN_SANITIZE_early_32.o := n +KCSAN_SANITIZE_cputable.o := n +KCSAN_SANITIZE_btext.o := n +KCSAN_SANITIZE_paca.o := n +KCSAN_SANITIZE_setup_64.o := n + #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET # Remove stack protector to avoid triggering unneeded stack canary # checks due to randomize_kstack_offset. @@ -61,13 +64,13 @@ CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong CFLAGS_syscall.o += -fno-stack-protector #endif -obj-y := cputable.o syscalls.o \ +obj-y := cputable.o syscalls.o switch.o \ irq.o align.o signal_$(BITS).o pmc.o vdso.o \ process.o systbl.o idle.o \ signal.o sysfs.o cacheinfo.o time.o \ prom.o traps.o setup-common.o \ udbg.o misc.o io.o misc_$(BITS).o \ - of_platform.o prom_parse.o firmware.o \ + prom_parse.o firmware.o \ hw_breakpoint_constraints.o interrupt.o \ kdebugfs.o stacktrace.o syscall.o obj-y += ptrace/ @@ -81,6 +84,7 @@ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_DAWR) += dawr.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o +obj-$(CONFIG_PPC_BOOK3S_64) += dexcr.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_64e.o obj-$(CONFIG_PPC_BARRIER_NOSPEC) += security.o @@ -119,11 +123,10 @@ obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_PPC64) += head_64.o obj-$(CONFIG_PPC_BOOK3S_32) += head_book3s_32.o -obj-$(CONFIG_40x) += head_40x.o obj-$(CONFIG_44x) += head_44x.o obj-$(CONFIG_PPC_8xx) += head_8xx.o obj-$(CONFIG_PPC_85xx) += head_85xx.o -extra-y += vmlinux.lds +always-$(KBUILD_BUILTIN) += vmlinux.lds obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o @@ -136,6 +139,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_OPTPROBES) += optprobes.o optprobes_head.o obj-$(CONFIG_KPROBES_ON_FTRACE) += kprobes-ftrace.o obj-$(CONFIG_UPROBES) += uprobes.o +obj-$(CONFIG_RETHOOK) += rethook.o obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o obj-$(CONFIG_ARCH_HAS_DMA_SET_MASK) += dma-mask.o @@ -148,8 +152,6 @@ obj-$(CONFIG_PCI_MSI) += msi.o obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o -obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o - obj-y += trace/ ifneq ($(CONFIG_PPC_INDIRECT_PIO),y) @@ -158,12 +160,7 @@ endif obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o -obj-$(CONFIG_PPC64) += $(obj64-y) -obj-$(CONFIG_PPC32) += $(obj32-y) - -ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE)(CONFIG_PPC_BOOK3S),) obj-y += ppc_save_regs.o -endif obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o @@ -177,16 +174,16 @@ obj-$(CONFIG_PPC_SECVAR_SYSFS) += secvar-sysfs.o # Disable GCOV, KCOV & sanitizers in odd or sensitive code GCOV_PROFILE_prom_init.o := n KCOV_INSTRUMENT_prom_init.o := n +KCSAN_SANITIZE_prom_init.o := n UBSAN_SANITIZE_prom_init.o := n GCOV_PROFILE_kprobes.o := n KCOV_INSTRUMENT_kprobes.o := n +KCSAN_SANITIZE_kprobes.o := n UBSAN_SANITIZE_kprobes.o := n GCOV_PROFILE_kprobes-ftrace.o := n KCOV_INSTRUMENT_kprobes-ftrace.o := n +KCSAN_SANITIZE_kprobes-ftrace.o := n UBSAN_SANITIZE_kprobes-ftrace.o := n -GCOV_PROFILE_syscall_64.o := n -KCOV_INSTRUMENT_syscall_64.o := n -UBSAN_SANITIZE_syscall_64.o := n UBSAN_SANITIZE_vdso.o := n # Necessary for booting with kcov enabled on book3e machines @@ -199,10 +196,15 @@ CFLAGS_paca.o += -fno-stack-protector obj-$(CONFIG_PPC_FPU) += fpu.o obj-$(CONFIG_ALTIVEC) += vector.o -obj-$(CONFIG_PPC64) += entry_64.o -obj-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o -extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init_check +obj-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o +obj64-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_entry_64.o +ifdef KBUILD_BUILTIN +always-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init_check +endif + +obj-$(CONFIG_PPC64) += $(obj64-y) +obj-$(CONFIG_PPC32) += $(obj32-y) quiet_cmd_prom_init_check = PROMCHK $@ cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" $(obj)/prom_init.o; touch $@ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index d24a59a98c0c..a4bc80b30410 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -8,6 +8,7 @@ * compile this file to assembler, and then extract the * #defines from the assembly-language output. */ +#define COMPILE_OFFSETS #include <linux/compat.h> #include <linux/signal.h> @@ -54,7 +55,7 @@ #endif #ifdef CONFIG_PPC32 -#ifdef CONFIG_BOOKE_OR_40x +#ifdef CONFIG_BOOKE #include "head_booke.h" #endif #endif @@ -185,7 +186,9 @@ int main(void) offsetof(struct task_struct, thread_info)); OFFSET(PACASAVEDMSR, paca_struct, saved_msr); OFFSET(PACAR1, paca_struct, saved_r1); +#ifndef CONFIG_PPC_KERNEL_PCREL OFFSET(PACATOC, paca_struct, kernel_toc); +#endif OFFSET(PACAKBASE, paca_struct, kernelbase); OFFSET(PACAKMSR, paca_struct, kernel_msr); #ifdef CONFIG_PPC_BOOK3S_64 @@ -244,9 +247,7 @@ int main(void) OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); OFFSET(PACAKEXECSTATE, paca_struct, kexec_state); OFFSET(PACA_DSCR_DEFAULT, paca_struct, dscr_default); -#ifdef CONFIG_PPC64 OFFSET(PACA_EXIT_SAVE_R1, paca_struct, exit_save_r1); -#endif #ifdef CONFIG_PPC_BOOK3E_64 OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save); #endif @@ -334,7 +335,6 @@ int main(void) #endif /* ! CONFIG_PPC64 */ /* datapage offsets for use by vdso */ - OFFSET(VDSO_DATA_OFFSET, vdso_arch_data, data); OFFSET(CFG_TB_TICKS_PER_SEC, vdso_arch_data, tb_ticks_per_sec); #ifdef CONFIG_PPC64 OFFSET(CFG_ICACHE_BLOCKSZ, vdso_arch_data, icache_block_size); @@ -594,7 +594,6 @@ int main(void) HSTATE_FIELD(HSTATE_DABR, dabr); HSTATE_FIELD(HSTATE_DECEXP, dec_expires); HSTATE_FIELD(HSTATE_SPLIT_MODE, kvm_split_mode); - DEFINE(IPI_PRIORITY, IPI_PRIORITY); OFFSET(KVM_SPLIT_RPR, kvm_split_mode, rpr); OFFSET(KVM_SPLIT_PMMAR, kvm_split_mode, pmmar); OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar); @@ -674,5 +673,16 @@ int main(void) DEFINE(BPT_SIZE, BPT_SIZE); #endif +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + DEFINE(FTRACE_OOL_STUB_SIZE, sizeof(struct ftrace_ool_stub)); +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + OFFSET(FTRACE_OPS_FUNC, ftrace_ops, func); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + OFFSET(FTRACE_OPS_DIRECT_CALL, ftrace_ops, direct_call); +#endif +#endif + return 0; } diff --git a/arch/powerpc/kernel/audit.c b/arch/powerpc/kernel/audit.c index 1bcfca5fdf67..92298d6a3a37 100644 --- a/arch/powerpc/kernel/audit.c +++ b/arch/powerpc/kernel/audit.c @@ -4,6 +4,8 @@ #include <linux/audit.h> #include <asm/unistd.h> +#include "audit_32.h" + static unsigned dir_class[] = { #include <asm-generic/audit_dir_write.h> ~0U @@ -41,7 +43,6 @@ int audit_classify_arch(int arch) int audit_classify_syscall(int abi, unsigned syscall) { #ifdef CONFIG_PPC64 - extern int ppc32_classify_syscall(unsigned); if (abi == AUDIT_ARCH_PPC) return ppc32_classify_syscall(syscall); #endif diff --git a/arch/powerpc/kernel/audit_32.h b/arch/powerpc/kernel/audit_32.h new file mode 100644 index 000000000000..c6c79c3041ab --- /dev/null +++ b/arch/powerpc/kernel/audit_32.h @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef __AUDIT_32_H__ +#define __AUDIT_32_H__ + +extern int ppc32_classify_syscall(unsigned); + +#endif diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 2769889219bf..7f63f1cdc6c3 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -8,6 +8,7 @@ #include <linux/string.h> #include <linux/init.h> #include <linux/export.h> +#include <linux/font.h> #include <linux/memblock.h> #include <linux/pgtable.h> #include <linux/of.h> @@ -41,10 +42,6 @@ static unsigned char *logicalDisplayBase __force_data; unsigned long disp_BAT[2] __initdata = {0, 0}; -#define cmapsz (16*256) - -static unsigned char vga_font[cmapsz]; - static int boot_text_mapped __force_data; extern void rmci_on(void); @@ -235,7 +232,7 @@ int __init btext_find_display(int allow_nonstdout) return rc; for_each_node_by_type(np, "display") { - if (of_get_property(np, "linux,opened", NULL)) { + if (of_property_read_bool(np, "linux,opened")) { printk("trying %pOF ...\n", np); rc = btext_initialize(np); printk("result: %d\n", rc); @@ -407,7 +404,7 @@ static unsigned int expand_bits_16[4] = { }; -static void draw_byte_32(unsigned char *font, unsigned int *base, int rb) +static void draw_byte_32(const unsigned char *font, unsigned int *base, int rb) { int l, bits; int fg = 0xFFFFFFFFUL; @@ -428,7 +425,7 @@ static void draw_byte_32(unsigned char *font, unsigned int *base, int rb) } } -static inline void draw_byte_16(unsigned char *font, unsigned int *base, int rb) +static inline void draw_byte_16(const unsigned char *font, unsigned int *base, int rb) { int l, bits; int fg = 0xFFFFFFFFUL; @@ -446,7 +443,7 @@ static inline void draw_byte_16(unsigned char *font, unsigned int *base, int rb) } } -static inline void draw_byte_8(unsigned char *font, unsigned int *base, int rb) +static inline void draw_byte_8(const unsigned char *font, unsigned int *base, int rb) { int l, bits; int fg = 0x0F0F0F0FUL; @@ -465,7 +462,8 @@ static inline void draw_byte_8(unsigned char *font, unsigned int *base, int rb) static noinline void draw_byte(unsigned char c, long locX, long locY) { unsigned char *base = calc_base(locX << 3, locY << 4); - unsigned char *font = &vga_font[((unsigned int)c) * 16]; + unsigned int font_index = c * 16; + const unsigned char *font = font_sun_8x16.data + font_index; int rb = dispDeviceRowBytes; rmci_maybe_on(); @@ -583,349 +581,3 @@ void __init udbg_init_btext(void) */ udbg_putc = btext_drawchar; } - -static unsigned char vga_font[cmapsz] = { -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd, -0x99, 0x81, 0x81, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xff, -0xdb, 0xff, 0xff, 0xc3, 0xe7, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x7c, 0xfe, -0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, -0x3c, 0x3c, 0xe7, 0xe7, 0xe7, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x7e, 0x18, 0x18, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, -0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00, -0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc3, 0x99, 0xbd, -0xbd, 0x99, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x1e, 0x0e, -0x1a, 0x32, 0x78, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x3c, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x30, -0x30, 0x70, 0xf0, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x63, -0x7f, 0x63, 0x63, 0x63, 0x63, 0x67, 0xe7, 0xe6, 0xc0, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x18, 0x18, 0xdb, 0x3c, 0xe7, 0x3c, 0xdb, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfe, 0xf8, -0xf0, 0xe0, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x06, 0x0e, -0x1e, 0x3e, 0xfe, 0x3e, 0x1e, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, -0x66, 0x00, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xdb, -0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x00, 0x00, 0x00, 0x00, -0x00, 0x7c, 0xc6, 0x60, 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x38, 0x0c, 0xc6, -0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0xfe, 0xfe, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, -0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc0, -0xc0, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x38, 0x7c, 0x7c, 0xfe, 0xfe, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0x7c, 0x7c, -0x38, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x24, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6c, -0x6c, 0xfe, 0x6c, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, -0x18, 0x18, 0x7c, 0xc6, 0xc2, 0xc0, 0x7c, 0x06, 0x06, 0x86, 0xc6, 0x7c, -0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0xc6, 0x0c, 0x18, -0x30, 0x60, 0xc6, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, -0x6c, 0x38, 0x76, 0xdc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, -0x00, 0x30, 0x30, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30, -0x30, 0x30, 0x18, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x18, -0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, -0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x02, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xce, 0xde, 0xf6, 0xe6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x38, 0x78, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, -0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x7c, 0xc6, 0x06, 0x06, 0x3c, 0x06, 0x06, 0x06, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x1c, 0x3c, 0x6c, 0xcc, 0xfe, -0x0c, 0x0c, 0x0c, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0, -0xc0, 0xc0, 0xfc, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x38, 0x60, 0xc0, 0xc0, 0xfc, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0x06, 0x06, 0x0c, 0x18, -0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, -0xc6, 0xc6, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x06, 0x06, 0x0c, 0x78, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, -0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x06, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, -0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, -0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x0c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xde, 0xde, -0xde, 0xdc, 0xc0, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, -0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x66, 0x66, 0x66, 0x66, 0xfc, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0, -0xc0, 0xc2, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x6c, -0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x62, 0x66, 0xfe, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, -0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, -0xc2, 0xc0, 0xc0, 0xde, 0xc6, 0xc6, 0x66, 0x3a, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x0c, -0x0c, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xe6, 0x66, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0x66, 0xe6, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x60, 0x60, 0x60, 0x60, 0x60, -0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xe7, -0xff, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0xc6, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, -0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, -0x66, 0x66, 0x7c, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xd6, 0xde, 0x7c, -0x0c, 0x0e, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x6c, -0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, -0xc6, 0x60, 0x38, 0x0c, 0x06, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xff, 0xdb, 0x99, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, -0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, -0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x66, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18, -0x3c, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, -0xc3, 0x66, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xff, 0xc3, 0x86, 0x0c, 0x18, 0x30, 0x60, 0xc1, 0xc3, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30, -0x30, 0x30, 0x30, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, -0xc0, 0xe0, 0x70, 0x38, 0x1c, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, -0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x0c, 0x7c, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x60, -0x60, 0x78, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc0, 0xc0, 0xc0, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x0c, 0x3c, 0x6c, 0xcc, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xf0, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc, -0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xcc, 0x78, 0x00, 0x00, 0x00, 0xe0, 0x60, -0x60, 0x6c, 0x76, 0x66, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x18, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x06, 0x00, 0x0e, 0x06, 0x06, -0x06, 0x06, 0x06, 0x06, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0xe0, 0x60, -0x60, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xdb, -0xdb, 0xdb, 0xdb, 0xdb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66, -0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0x0c, 0x1e, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0x60, 0x60, 0xf0, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0x60, -0x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x30, -0x30, 0xfc, 0x30, 0x30, 0x30, 0x30, 0x36, 0x1c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3, -0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, -0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xfe, 0xcc, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x0e, 0x18, 0x18, 0x18, 0x70, 0x18, 0x18, 0x18, 0x18, 0x0e, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00, 0x18, -0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x18, -0x18, 0x18, 0x0e, 0x18, 0x18, 0x18, 0x18, 0x70, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, -0xc6, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, -0xc2, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x0c, 0x06, 0x7c, 0x00, 0x00, -0x00, 0x00, 0xcc, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, -0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x00, 0x7c, 0xc6, 0xfe, -0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, -0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xcc, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, -0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0x78, 0x0c, 0x7c, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, -0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x60, 0x60, 0x66, 0x3c, 0x0c, 0x06, -0x3c, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xfe, -0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, -0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x38, 0x18, 0x18, -0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, 0x66, -0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x60, 0x30, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, -0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, 0x00, -0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, -0x18, 0x30, 0x60, 0x00, 0xfe, 0x66, 0x60, 0x7c, 0x60, 0x60, 0x66, 0xfe, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x3b, 0x1b, -0x7e, 0xd8, 0xdc, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x6c, -0xcc, 0xcc, 0xfe, 0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0x00, 0x00, 0x00, 0x00, -0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xc6, -0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, -0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x30, 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, -0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0xcc, 0xcc, 0xcc, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, -0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0x78, 0x00, -0x00, 0xc6, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, -0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, -0xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, -0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xe6, 0xfc, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0xff, 0x18, -0xff, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, -0x7c, 0x62, 0x66, 0x6f, 0x66, 0x66, 0x66, 0xf3, 0x00, 0x00, 0x00, 0x00, -0x00, 0x0e, 0x1b, 0x18, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, -0xd8, 0x70, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0x78, 0x0c, 0x7c, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, -0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x18, 0x30, 0x60, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0xcc, 0xcc, 0xcc, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, -0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, -0x76, 0xdc, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, -0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c, -0x38, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x30, 0x30, 0x00, 0x30, 0x30, 0x60, 0xc0, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0, -0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xfe, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x60, 0xce, 0x9b, 0x06, -0x0c, 0x1f, 0x00, 0x00, 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, -0x66, 0xce, 0x96, 0x3e, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, -0x00, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x6c, 0xd8, 0x6c, 0x36, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd8, 0x6c, 0x36, -0x6c, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x44, 0x11, 0x44, -0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, -0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, -0x55, 0xaa, 0x55, 0xaa, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, -0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0xf8, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, -0x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0xf6, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, -0x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x3f, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, -0x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, -0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xf0, 0xf0, 0xf0, -0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, -0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, -0x0f, 0x0f, 0x0f, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x76, 0xdc, 0xd8, 0xd8, 0xd8, 0xdc, 0x76, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0xd8, 0xcc, 0xc6, 0xc6, 0xc6, 0xcc, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0xc6, 0xc0, 0xc0, 0xc0, -0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0xfe, 0xc6, 0x60, 0x30, 0x18, 0x30, 0x60, 0xc6, 0xfe, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xd8, 0xd8, -0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x18, 0x3c, 0x66, 0x66, -0x66, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, -0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x6c, 0x6c, 0x6c, 0xee, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x30, 0x18, 0x0c, 0x3e, 0x66, -0x66, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x7e, 0xdb, 0xdb, 0xdb, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x03, 0x06, 0x7e, 0xdb, 0xdb, 0xf3, 0x7e, 0x60, 0xc0, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x30, 0x60, 0x60, 0x7c, 0x60, -0x60, 0x60, 0x30, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, -0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18, -0x18, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, -0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x00, 0x7e, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x1b, 0x1b, 0x1b, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x7e, 0x00, 0x18, 0x18, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x00, -0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c, -0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x0c, 0x0c, -0x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x6c, 0x3c, 0x1c, 0x00, 0x00, 0x00, 0x00, -0x00, 0xd8, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0xd8, 0x30, 0x60, 0xc8, 0xf8, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, -}; - diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index f502337dd37d..0fcc463b02e2 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -735,7 +735,7 @@ static const struct sysfs_ops cache_index_ops = { .show = cache_index_show, }; -static struct kobj_type cache_index_type = { +static const struct kobj_type cache_index_type = { .release = cache_index_release, .sysfs_ops = &cache_index_ops, .default_groups = cache_index_default_groups, diff --git a/arch/powerpc/kernel/compat_audit.c b/arch/powerpc/kernel/compat_audit.c index d92ffe4e5dc1..57b38c592b9f 100644 --- a/arch/powerpc/kernel/compat_audit.c +++ b/arch/powerpc/kernel/compat_audit.c @@ -3,6 +3,8 @@ #include <linux/audit_arch.h> #include <asm/unistd.h> +#include "audit_32.h" + unsigned ppc32_dir_class[] = { #include <asm-generic/audit_dir_write.h> ~0U diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index f29ce3dd6140..ab3ca74e6730 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -26,6 +26,15 @@ BEGIN_FTR_SECTION bl __init_fpu_registers END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE) bl setup_common_caches + + /* + * This assumes that all cores using __setup_cpu_603 with + * MMU_FTR_USE_HIGH_BATS are G2_LE compatible + */ +BEGIN_MMU_FTR_SECTION + bl setup_g2_le_hid2 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) + mtlr r5 blr _GLOBAL(__setup_cpu_604) @@ -115,6 +124,16 @@ SYM_FUNC_START_LOCAL(setup_604_hid0) blr SYM_FUNC_END(setup_604_hid0) +/* Enable high BATs for G2_LE and derivatives like e300cX */ +SYM_FUNC_START_LOCAL(setup_g2_le_hid2) + mfspr r11,SPRN_HID2_G2_LE + oris r11,r11,HID2_G2_LE_HBE@h + mtspr SPRN_HID2_G2_LE,r11 + sync + isync + blr +SYM_FUNC_END(setup_g2_le_hid2) + /* 7400 <= rev 2.7 and 7410 rev = 1.0 suffer from some * erratas we work around here. * Moto MPC710CE.pdf describes them, those are errata @@ -382,7 +401,7 @@ _GLOBAL(__save_cpu_setup) andi. r3,r3,0xff00 cmpwi cr0,r3,0x0200 bne 1f - mfspr r4,SPRN_HID2 + mfspr r4,SPRN_HID2_750FX stw r4,CS_HID2(r5) 1: mtcr r7 @@ -477,7 +496,7 @@ _GLOBAL(__restore_cpu_setup) bne 4f lwz r4,CS_HID2(r5) rlwinm r4,r4,0,19,17 - mtspr SPRN_HID2,r4 + mtspr SPRN_HID2_750FX,r4 sync 4: lwz r4,CS_HID1(r5) @@ -495,4 +514,3 @@ _GLOBAL(__restore_cpu_setup) mtcr r7 blr _ASM_NOKPROBE_SYMBOL(__restore_cpu_setup) - diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c index 097c033668f0..98bd4e6c1770 100644 --- a/arch/powerpc/kernel/cpu_setup_power.c +++ b/arch/powerpc/kernel/cpu_setup_power.c @@ -126,6 +126,12 @@ static void init_PMU_ISA31(void) mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT); } +static void init_DEXCR(void) +{ + mtspr(SPRN_DEXCR, DEXCR_INIT); + mtspr(SPRN_HASHKEYR, 0); +} + /* * Note that we can be called twice of pseudo-PVRs. * The parameter offset is not used. @@ -241,6 +247,7 @@ void __setup_cpu_power10(unsigned long offset, struct cpu_spec *t) init_FSCR_power10(); init_PMU(); init_PMU_ISA31(); + init_DEXCR(); if (!init_hvmode_206(t)) return; @@ -263,6 +270,7 @@ void __restore_cpu_power10(void) init_FSCR_power10(); init_PMU(); init_PMU_ISA31(); + init_DEXCR(); msr = mfmsr(); if (!(msr & MSR_HV)) diff --git a/arch/powerpc/kernel/cpu_specs.h b/arch/powerpc/kernel/cpu_specs.h index 85ded3f77204..5ea14605bb41 100644 --- a/arch/powerpc/kernel/cpu_specs.h +++ b/arch/powerpc/kernel/cpu_specs.h @@ -1,9 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifdef CONFIG_40x -#include "cpu_specs_40x.h" -#endif - #ifdef CONFIG_PPC_47x #include "cpu_specs_47x.h" #elif defined(CONFIG_44x) diff --git a/arch/powerpc/kernel/cpu_specs_40x.h b/arch/powerpc/kernel/cpu_specs_40x.h deleted file mode 100644 index a1362a75b8c8..000000000000 --- a/arch/powerpc/kernel/cpu_specs_40x.h +++ /dev/null @@ -1,280 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) - */ - -static struct cpu_spec cpu_specs[] __initdata = { - { /* STB 04xxx */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x41810000, - .cpu_name = "STB04xxx", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* NP405L */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x41610000, - .cpu_name = "NP405L", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* NP4GS3 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x40B10000, - .cpu_name = "NP4GS3", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* NP405H */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x41410000, - .cpu_name = "NP405H", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405GPr */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x50910000, - .cpu_name = "405GPr", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* STBx25xx */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x51510000, - .cpu_name = "STBx25xx", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405LP */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x41F10000, - .cpu_name = "405LP", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EP */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x51210000, - .cpu_name = "405EP", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EX Rev. A/B with Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910007, - .cpu_name = "405EX Rev. A/B", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EX Rev. C without Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x1291000d, - .cpu_name = "405EX Rev. C", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EX Rev. C with Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x1291000f, - .cpu_name = "405EX Rev. C", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EX Rev. D without Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910003, - .cpu_name = "405EX Rev. D", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EX Rev. D with Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910005, - .cpu_name = "405EX Rev. D", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EXr Rev. A/B without Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910001, - .cpu_name = "405EXr Rev. A/B", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EXr Rev. C without Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910009, - .cpu_name = "405EXr Rev. C", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EXr Rev. C with Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x1291000b, - .cpu_name = "405EXr Rev. C", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EXr Rev. D without Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910000, - .cpu_name = "405EXr Rev. D", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EXr Rev. D with Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910002, - .cpu_name = "405EXr Rev. D", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { - /* 405EZ */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x41510000, - .cpu_name = "405EZ", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* APM8018X */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x7ff11432, - .cpu_name = "APM8018X", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* default match */ - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "(generic 40x PPC)", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - } -}; diff --git a/arch/powerpc/kernel/cpu_specs_book3s_64.h b/arch/powerpc/kernel/cpu_specs_book3s_64.h index c370c1b804a9..98d4274a1b6b 100644 --- a/arch/powerpc/kernel/cpu_specs_book3s_64.h +++ b/arch/powerpc/kernel/cpu_specs_book3s_64.h @@ -60,6 +60,9 @@ PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \ PPC_FEATURE2_VEC_CRYPTO) +#define COMMON_USER_POWER11 COMMON_USER_POWER10 +#define COMMON_USER2_POWER11 COMMON_USER2_POWER10 + static struct cpu_spec cpu_specs[] __initdata = { { /* PPC970 */ .pvr_mask = 0xffff0000, @@ -238,6 +241,21 @@ static struct cpu_spec cpu_specs[] __initdata = { .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, + { /* 2.07-compliant processor, HeXin C2000 processor */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00660000, + .cpu_name = "HX-C2000", + .cpu_features = CPU_FTRS_POWER8, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .machine_check_early = __machine_check_early_realmode_p8, + .platform = "power8", + }, { /* 3.00-compliant processor, i.e. Power9 "architected" mode */ .pvr_mask = 0xffffffff, .pvr_value = 0x0f000005, @@ -266,6 +284,20 @@ static struct cpu_spec cpu_specs[] __initdata = { .cpu_restore = __restore_cpu_power10, .platform = "power10", }, + { /* 3.1-compliant processor, i.e. Power11 "architected" mode */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000007, + .cpu_name = "Power11 (architected)", + .cpu_features = CPU_FTRS_POWER11, + .cpu_user_features = COMMON_USER_POWER11, + .cpu_user_features2 = COMMON_USER2_POWER11, + .mmu_features = MMU_FTRS_POWER11, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power10, + .cpu_restore = __restore_cpu_power10, + .platform = "power11", + }, { /* Power7 */ .pvr_mask = 0xffff0000, .pvr_value = 0x003f0000, @@ -436,6 +468,23 @@ static struct cpu_spec cpu_specs[] __initdata = { .machine_check_early = __machine_check_early_realmode_p10, .platform = "power10", }, + { /* Power11 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00820000, + .cpu_name = "Power11 (raw)", + .cpu_features = CPU_FTRS_POWER11, + .cpu_user_features = COMMON_USER_POWER11, + .cpu_user_features2 = COMMON_USER2_POWER11, + .mmu_features = MMU_FTRS_POWER11, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power10, + .cpu_restore = __restore_cpu_power10, + .machine_check_early = __machine_check_early_realmode_p10, + .platform = "power11", + }, { /* Cell Broadband Engine */ .pvr_mask = 0xffff0000, .pvr_value = 0x00700000, diff --git a/arch/powerpc/kernel/cpu_specs_e500mc.h b/arch/powerpc/kernel/cpu_specs_e500mc.h index ceb06b109f83..2ae8e9a7b461 100644 --- a/arch/powerpc/kernel/cpu_specs_e500mc.h +++ b/arch/powerpc/kernel/cpu_specs_e500mc.h @@ -8,7 +8,8 @@ #ifdef CONFIG_PPC64 #define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ - PPC_FEATURE_HAS_FPU | PPC_FEATURE_64) + PPC_FEATURE_HAS_FPU | PPC_FEATURE_64 | \ + PPC_FEATURE_BOOKE) #else #define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ PPC_FEATURE_BOOKE) diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 8a32bffefa5b..6f6801da9dc1 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -20,9 +20,9 @@ #include <asm/setup.h> #include <asm/cpu_setup.h> -static struct cpu_spec the_cpu_spec __read_mostly; +static struct cpu_spec the_cpu_spec __ro_after_init; -struct cpu_spec* cur_cpu_spec __read_mostly = NULL; +struct cpu_spec *cur_cpu_spec __ro_after_init = NULL; EXPORT_SYMBOL(cur_cpu_spec); /* The platform string corresponding to the real PVR */ @@ -75,6 +75,10 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset, t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG; } + /* Set kuap ON at startup, will be disabled later if cmdline has 'nosmap' */ + if (IS_ENABLED(CONFIG_PPC_KUAP) && IS_ENABLED(CONFIG_PPC32)) + t->mmu_features |= MMU_FTR_KUAP; + *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec; /* diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 9a3b85bfc83f..103b6605dd68 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -13,12 +13,13 @@ #include <linux/io.h> #include <linux/memblock.h> #include <linux/of.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/kdump.h> #include <asm/firmware.h> #include <linux/uio.h> #include <asm/rtas.h> #include <asm/inst.h> +#include <asm/fadump.h> #ifdef DEBUG #include <asm/udbg.h> @@ -92,6 +93,17 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, return csize; } +/* + * Return true only when kexec based kernel dump capturing method is used. + * This ensures all restritions applied for kdump case are not automatically + * applied for fadump case. + */ +bool is_kdump_kernel(void) +{ + return !is_fadump_active() && elfcorehdr_addr != ELFCORE_ADDR_MAX; +} +EXPORT_SYMBOL_GPL(is_kdump_kernel); + #ifdef CONFIG_PPC_RTAS /* * The crashkernel region will almost always overlap the RTAS region, so diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index f55c6fb34a3a..5712dd846263 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -27,7 +27,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception) ppc_msgsync(); - if (should_hard_irq_enable()) + if (should_hard_irq_enable(regs)) do_hard_irq_enable(); kvmppc_clear_host_ipi(smp_processor_id()); diff --git a/arch/powerpc/kernel/dexcr.c b/arch/powerpc/kernel/dexcr.c new file mode 100644 index 000000000000..3a0358e91c60 --- /dev/null +++ b/arch/powerpc/kernel/dexcr.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/capability.h> +#include <linux/cpu.h> +#include <linux/init.h> +#include <linux/prctl.h> +#include <linux/sched.h> + +#include <asm/cpu_has_feature.h> +#include <asm/cputable.h> +#include <asm/processor.h> +#include <asm/reg.h> + +static int __init init_task_dexcr(void) +{ + if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) + return 0; + + current->thread.dexcr_onexec = mfspr(SPRN_DEXCR); + + return 0; +} +early_initcall(init_task_dexcr) + +/* Allow thread local configuration of these by default */ +#define DEXCR_PRCTL_EDITABLE ( \ + DEXCR_PR_IBRTPD | \ + DEXCR_PR_SRAPD | \ + DEXCR_PR_NPHIE) + +static int prctl_to_aspect(unsigned long which, unsigned int *aspect) +{ + switch (which) { + case PR_PPC_DEXCR_SBHE: + *aspect = DEXCR_PR_SBHE; + break; + case PR_PPC_DEXCR_IBRTPD: + *aspect = DEXCR_PR_IBRTPD; + break; + case PR_PPC_DEXCR_SRAPD: + *aspect = DEXCR_PR_SRAPD; + break; + case PR_PPC_DEXCR_NPHIE: + *aspect = DEXCR_PR_NPHIE; + break; + default: + return -ENODEV; + } + + return 0; +} + +int get_dexcr_prctl(struct task_struct *task, unsigned long which) +{ + unsigned int aspect; + int ret; + + ret = prctl_to_aspect(which, &aspect); + if (ret) + return ret; + + if (aspect & DEXCR_PRCTL_EDITABLE) + ret |= PR_PPC_DEXCR_CTRL_EDITABLE; + + if (aspect & mfspr(SPRN_DEXCR)) + ret |= PR_PPC_DEXCR_CTRL_SET; + else + ret |= PR_PPC_DEXCR_CTRL_CLEAR; + + if (aspect & task->thread.dexcr_onexec) + ret |= PR_PPC_DEXCR_CTRL_SET_ONEXEC; + else + ret |= PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC; + + return ret; +} + +int set_dexcr_prctl(struct task_struct *task, unsigned long which, unsigned long ctrl) +{ + unsigned long dexcr; + unsigned int aspect; + int err = 0; + + err = prctl_to_aspect(which, &aspect); + if (err) + return err; + + if (!(aspect & DEXCR_PRCTL_EDITABLE)) + return -EPERM; + + if (ctrl & ~PR_PPC_DEXCR_CTRL_MASK) + return -EINVAL; + + if (ctrl & PR_PPC_DEXCR_CTRL_SET && ctrl & PR_PPC_DEXCR_CTRL_CLEAR) + return -EINVAL; + + if (ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC && ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC) + return -EINVAL; + + /* + * We do not want an unprivileged process being able to disable + * a setuid process's hash check instructions + */ + if (aspect == DEXCR_PR_NPHIE && + ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC && + !capable(CAP_SYS_ADMIN)) + return -EPERM; + + dexcr = mfspr(SPRN_DEXCR); + + if (ctrl & PR_PPC_DEXCR_CTRL_SET) + dexcr |= aspect; + else if (ctrl & PR_PPC_DEXCR_CTRL_CLEAR) + dexcr &= ~aspect; + + if (ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC) + task->thread.dexcr_onexec |= aspect; + else if (ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC) + task->thread.dexcr_onexec &= ~aspect; + + mtspr(SPRN_DEXCR, dexcr); + + return 0; +} diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 038ce8d9061d..aa3689d61917 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -14,7 +14,7 @@ #define can_map_direct(dev, addr) \ ((dev)->bus_dma_limit >= phys_to_dma((dev), (addr))) -bool arch_dma_map_page_direct(struct device *dev, phys_addr_t addr) +bool arch_dma_map_phys_direct(struct device *dev, phys_addr_t addr) { if (likely(!dev->bus_dma_limit)) return false; @@ -24,7 +24,7 @@ bool arch_dma_map_page_direct(struct device *dev, phys_addr_t addr) #define is_direct_handle(dev, h) ((h) >= (dev)->archdata.dma_offset) -bool arch_dma_unmap_page_direct(struct device *dev, dma_addr_t dma_handle) +bool arch_dma_unmap_phys_direct(struct device *dev, dma_addr_t dma_handle) { if (likely(!dev->bus_dma_limit)) return false; @@ -93,28 +93,26 @@ static void dma_iommu_free_coherent(struct device *dev, size_t size, /* Creates TCEs for a user provided buffer. The user buffer must be * contiguous real kernel storage (not vmalloc). The address passed here - * comprises a page address and offset into that page. The dma_addr_t - * returned will point to the same byte within the page as was passed in. + * is a physical address to that page. The dma_addr_t returned will point + * to the same byte within the page as was passed in. */ -static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, +static dma_addr_t dma_iommu_map_phys(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction direction, unsigned long attrs) { - return iommu_map_page(dev, get_iommu_table_base(dev), page, offset, - size, dma_get_mask(dev), direction, attrs); + return iommu_map_phys(dev, get_iommu_table_base(dev), phys, size, + dma_get_mask(dev), direction, attrs); } - -static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, +static void dma_iommu_unmap_phys(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction, unsigned long attrs) { - iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction, + iommu_unmap_phys(get_iommu_table_base(dev), dma_handle, size, direction, attrs); } - static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, unsigned long attrs) @@ -136,7 +134,7 @@ static bool dma_iommu_bypass_supported(struct device *dev, u64 mask) struct pci_dev *pdev = to_pci_dev(dev); struct pci_controller *phb = pci_bus_to_host(pdev->bus); - if (iommu_fixed_is_weak || !phb->controller_ops.iommu_bypass_supported) + if (!phb->controller_ops.iommu_bypass_supported) return false; return phb->controller_ops.iommu_bypass_supported(pdev, mask); } @@ -144,7 +142,7 @@ static bool dma_iommu_bypass_supported(struct device *dev, u64 mask) /* We support DMA to/from any memory page via the iommu */ int dma_iommu_dma_supported(struct device *dev, u64 mask) { - struct iommu_table *tbl = get_iommu_table_base(dev); + struct iommu_table *tbl; if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) { /* @@ -162,6 +160,8 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask) return 1; } + tbl = get_iommu_table_base(dev); + if (!tbl) { dev_err(dev, "Warning: IOMMU dma not supported: mask 0x%08llx, table unavailable\n", mask); return 0; @@ -209,11 +209,11 @@ const struct dma_map_ops dma_iommu_ops = { .map_sg = dma_iommu_map_sg, .unmap_sg = dma_iommu_unmap_sg, .dma_supported = dma_iommu_dma_supported, - .map_page = dma_iommu_map_page, - .unmap_page = dma_iommu_unmap_page, + .map_phys = dma_iommu_map_phys, + .unmap_phys = dma_iommu_unmap_phys, .get_required_mask = dma_iommu_get_required_mask, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; diff --git a/arch/powerpc/kernel/dma-mask.c b/arch/powerpc/kernel/dma-mask.c index ffbbbc432612..5b07ca7b73aa 100644 --- a/arch/powerpc/kernel/dma-mask.c +++ b/arch/powerpc/kernel/dma-mask.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/dma-mapping.h> +#include <linux/dma-map-ops.h> #include <linux/export.h> #include <asm/machdep.h> diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index c3fb9fdf5bd7..3af6c06af02f 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -458,6 +458,14 @@ static int __init feat_enable_mce_power10(struct dt_cpu_feature *f) return 1; } +static int __init feat_enable_mce_power11(struct dt_cpu_feature *f) +{ + cur_cpu_spec->platform = "power11"; + cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p10; + + return 1; +} + static int __init feat_enable_tm(struct dt_cpu_feature *f) { #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -648,8 +656,10 @@ static struct dt_cpu_feature_match __initdata {"pc-relative-addressing", feat_enable, 0}, {"machine-check-power9", feat_enable_mce_power9, 0}, {"machine-check-power10", feat_enable_mce_power10, 0}, + {"machine-check-power11", feat_enable_mce_power11, 0}, {"performance-monitor-power9", feat_enable_pmu_power9, 0}, {"performance-monitor-power10", feat_enable_pmu_power10, 0}, + {"performance-monitor-power11", feat_enable_pmu_power10, 0}, {"event-based-branch-v3", feat_enable, 0}, {"random-number-generator", feat_enable, 0}, {"system-call-vectored", feat_disable, 0}, @@ -857,7 +867,7 @@ bool __init dt_cpu_ftrs_init(void *fdt) using_dt_cpu_ftrs = false; /* Setup and verify the FDT, if it fails we just bail */ - if (!early_init_dt_verify(fdt)) + if (!early_init_dt_verify(fdt, __pa(fdt))) return false; if (!of_scan_flat_dt(fdt_find_cpu_features, NULL)) @@ -1077,12 +1087,10 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char /* Count and allocate space for cpu features */ of_scan_flat_dt_subnodes(node, count_cpufeatures_subnodes, &nr_dt_cpu_features); - dt_cpu_features = memblock_alloc(sizeof(struct dt_cpu_feature) * nr_dt_cpu_features, PAGE_SIZE); - if (!dt_cpu_features) - panic("%s: Failed to allocate %zu bytes align=0x%lx\n", - __func__, - sizeof(struct dt_cpu_feature) * nr_dt_cpu_features, - PAGE_SIZE); + dt_cpu_features = + memblock_alloc_or_panic( + sizeof(struct dt_cpu_feature) * nr_dt_cpu_features, + PAGE_SIZE); cpufeatures_setup_start(isa); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index ab316e155ea9..bb836f02101c 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -506,9 +506,18 @@ int eeh_dev_check_failure(struct eeh_dev *edev) * We will punt with the following conditions: Failure to get * PE's state, EEH not support and Permanently unavailable * state, PE is in good state. + * + * On the pSeries, after reaching the threshold, get_state might + * return EEH_STATE_NOT_SUPPORT. However, it's possible that the + * device state remains uncleared if the device is not marked + * pci_channel_io_perm_failure. Therefore, consider logging the + * event to let device removal happen. + * */ if ((ret < 0) || - (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { + (ret == EEH_STATE_NOT_SUPPORT && + dev->error_state == pci_channel_io_perm_failure) || + eeh_state_active(ret)) { eeh_stats.false_positives++; pe->false_positives++; rc = 0; @@ -1130,6 +1139,7 @@ int eeh_unfreeze_pe(struct eeh_pe *pe) return ret; } +EXPORT_SYMBOL_GPL(eeh_unfreeze_pe); static struct pci_device_id eeh_reset_ids[] = { @@ -1199,16 +1209,16 @@ int eeh_dev_open(struct pci_dev *pdev) struct eeh_dev *edev; int ret = -ENODEV; - mutex_lock(&eeh_dev_mutex); + guard(mutex)(&eeh_dev_mutex); /* No PCI device ? */ if (!pdev) - goto out; + return ret; /* No EEH device or PE ? */ edev = pci_dev_to_eeh_dev(pdev); if (!edev || !edev->pe) - goto out; + return ret; /* * The PE might have been put into frozen state, but we @@ -1218,16 +1228,12 @@ int eeh_dev_open(struct pci_dev *pdev) */ ret = eeh_pe_change_owner(edev->pe); if (ret) - goto out; + return ret; /* Increase PE's pass through count */ atomic_inc(&edev->pe->pass_dev_cnt); - mutex_unlock(&eeh_dev_mutex); return 0; -out: - mutex_unlock(&eeh_dev_mutex); - return ret; } EXPORT_SYMBOL_GPL(eeh_dev_open); @@ -1243,43 +1249,25 @@ void eeh_dev_release(struct pci_dev *pdev) { struct eeh_dev *edev; - mutex_lock(&eeh_dev_mutex); + guard(mutex)(&eeh_dev_mutex); /* No PCI device ? */ if (!pdev) - goto out; + return; /* No EEH device ? */ edev = pci_dev_to_eeh_dev(pdev); if (!edev || !edev->pe || !eeh_pe_passed(edev->pe)) - goto out; + return; /* Decrease PE's pass through count */ WARN_ON(atomic_dec_if_positive(&edev->pe->pass_dev_cnt) < 0); eeh_pe_change_owner(edev->pe); -out: - mutex_unlock(&eeh_dev_mutex); } EXPORT_SYMBOL(eeh_dev_release); #ifdef CONFIG_IOMMU_API -static int dev_has_iommu_table(struct device *dev, void *data) -{ - struct pci_dev *pdev = to_pci_dev(dev); - struct pci_dev **ppdev = data; - - if (!dev) - return 0; - - if (device_iommu_mapped(dev)) { - *ppdev = pdev; - return 1; - } - - return 0; -} - /** * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE * @group: IOMMU group @@ -1516,6 +1504,8 @@ int eeh_pe_configure(struct eeh_pe *pe) /* Invalid PE ? */ if (!pe) return -ENODEV; + else + ret = eeh_ops->configure_bridge(pe); return ret; } @@ -1544,10 +1534,6 @@ int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, if (!eeh_ops || !eeh_ops->err_inject) return -ENOENT; - /* Check on PCI error type */ - if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64) - return -EINVAL; - /* Check on PCI error function */ if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX) return -EINVAL; @@ -1585,6 +1571,104 @@ static int proc_eeh_show(struct seq_file *m, void *v) } #endif /* CONFIG_PROC_FS */ +static int eeh_break_device(struct pci_dev *pdev) +{ + struct resource *bar = NULL; + void __iomem *mapped; + u16 old, bit; + int i, pos; + + /* Do we have an MMIO BAR to disable? */ + for (i = 0; i <= PCI_STD_RESOURCE_END; i++) { + struct resource *r = &pdev->resource[i]; + + if (!r->flags || !r->start) + continue; + if (r->flags & IORESOURCE_IO) + continue; + if (r->flags & IORESOURCE_UNSET) + continue; + + bar = r; + break; + } + + if (!bar) { + pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n"); + return -ENXIO; + } + + pci_err(pdev, "Going to break: %pR\n", bar); + + if (pdev->is_virtfn) { +#ifndef CONFIG_PCI_IOV + return -ENXIO; +#else + /* + * VFs don't have a per-function COMMAND register, so the best + * we can do is clear the Memory Space Enable bit in the PF's + * SRIOV control reg. + * + * Unfortunately, this requires that we have a PF (i.e doesn't + * work for a passed-through VF) and it has the potential side + * effect of also causing an EEH on every other VF under the + * PF. Oh well. + */ + pdev = pdev->physfn; + if (!pdev) + return -ENXIO; /* passed through VFs have no PF */ + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); + pos += PCI_SRIOV_CTRL; + bit = PCI_SRIOV_CTRL_MSE; +#endif /* !CONFIG_PCI_IOV */ + } else { + bit = PCI_COMMAND_MEMORY; + pos = PCI_COMMAND; + } + + /* + * Process here is: + * + * 1. Disable Memory space. + * + * 2. Perform an MMIO to the device. This should result in an error + * (CA / UR) being raised by the device which results in an EEH + * PE freeze. Using the in_8() accessor skips the eeh detection hook + * so the freeze hook so the EEH Detection machinery won't be + * triggered here. This is to match the usual behaviour of EEH + * where the HW will asynchronously freeze a PE and it's up to + * the kernel to notice and deal with it. + * + * 3. Turn Memory space back on. This is more important for VFs + * since recovery will probably fail if we don't. For normal + * the COMMAND register is reset as a part of re-initialising + * the device. + * + * Breaking stuff is the point so who cares if it's racy ;) + */ + pci_read_config_word(pdev, pos, &old); + + mapped = ioremap(bar->start, PAGE_SIZE); + if (!mapped) { + pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar); + return -ENXIO; + } + + pci_write_config_word(pdev, pos, old & ~bit); + in_8(mapped); + pci_write_config_word(pdev, pos, old); + + iounmap(mapped); + + return 0; +} + +int eeh_pe_inject_mmio_error(struct pci_dev *pdev) +{ + return eeh_break_device(pdev); +} + #ifdef CONFIG_DEBUG_FS @@ -1689,7 +1773,6 @@ static ssize_t eeh_force_recover_write(struct file *filp, static const struct file_operations eeh_force_recover_fops = { .open = simple_open, - .llseek = no_llseek, .write = eeh_force_recover_write, }; @@ -1733,104 +1816,10 @@ static ssize_t eeh_dev_check_write(struct file *filp, static const struct file_operations eeh_dev_check_fops = { .open = simple_open, - .llseek = no_llseek, .write = eeh_dev_check_write, .read = eeh_debugfs_dev_usage, }; -static int eeh_debugfs_break_device(struct pci_dev *pdev) -{ - struct resource *bar = NULL; - void __iomem *mapped; - u16 old, bit; - int i, pos; - - /* Do we have an MMIO BAR to disable? */ - for (i = 0; i <= PCI_STD_RESOURCE_END; i++) { - struct resource *r = &pdev->resource[i]; - - if (!r->flags || !r->start) - continue; - if (r->flags & IORESOURCE_IO) - continue; - if (r->flags & IORESOURCE_UNSET) - continue; - - bar = r; - break; - } - - if (!bar) { - pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n"); - return -ENXIO; - } - - pci_err(pdev, "Going to break: %pR\n", bar); - - if (pdev->is_virtfn) { -#ifndef CONFIG_PCI_IOV - return -ENXIO; -#else - /* - * VFs don't have a per-function COMMAND register, so the best - * we can do is clear the Memory Space Enable bit in the PF's - * SRIOV control reg. - * - * Unfortunately, this requires that we have a PF (i.e doesn't - * work for a passed-through VF) and it has the potential side - * effect of also causing an EEH on every other VF under the - * PF. Oh well. - */ - pdev = pdev->physfn; - if (!pdev) - return -ENXIO; /* passed through VFs have no PF */ - - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); - pos += PCI_SRIOV_CTRL; - bit = PCI_SRIOV_CTRL_MSE; -#endif /* !CONFIG_PCI_IOV */ - } else { - bit = PCI_COMMAND_MEMORY; - pos = PCI_COMMAND; - } - - /* - * Process here is: - * - * 1. Disable Memory space. - * - * 2. Perform an MMIO to the device. This should result in an error - * (CA / UR) being raised by the device which results in an EEH - * PE freeze. Using the in_8() accessor skips the eeh detection hook - * so the freeze hook so the EEH Detection machinery won't be - * triggered here. This is to match the usual behaviour of EEH - * where the HW will asynchronously freeze a PE and it's up to - * the kernel to notice and deal with it. - * - * 3. Turn Memory space back on. This is more important for VFs - * since recovery will probably fail if we don't. For normal - * the COMMAND register is reset as a part of re-initialising - * the device. - * - * Breaking stuff is the point so who cares if it's racy ;) - */ - pci_read_config_word(pdev, pos, &old); - - mapped = ioremap(bar->start, PAGE_SIZE); - if (!mapped) { - pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar); - return -ENXIO; - } - - pci_write_config_word(pdev, pos, old & ~bit); - in_8(mapped); - pci_write_config_word(pdev, pos, old); - - iounmap(mapped); - - return 0; -} - static ssize_t eeh_dev_break_write(struct file *filp, const char __user *user_buf, size_t count, loff_t *ppos) @@ -1842,7 +1831,7 @@ static ssize_t eeh_dev_break_write(struct file *filp, if (IS_ERR(pdev)) return PTR_ERR(pdev); - ret = eeh_debugfs_break_device(pdev); + ret = eeh_break_device(pdev); pci_dev_put(pdev); if (ret < 0) @@ -1853,7 +1842,6 @@ static ssize_t eeh_dev_break_write(struct file *filp, static const struct file_operations eeh_dev_break_fops = { .open = simple_open, - .llseek = no_llseek, .write = eeh_dev_break_write, .read = eeh_debugfs_dev_usage, }; @@ -1900,7 +1888,6 @@ static ssize_t eeh_dev_can_recover(struct file *filp, static const struct file_operations eeh_dev_can_recover_fops = { .open = simple_open, - .llseek = no_llseek, .write = eeh_dev_can_recover, .read = eeh_debugfs_dev_usage, }; diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index f279295179bd..ef78ff77cf8f 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -39,7 +39,7 @@ static int eeh_result_priority(enum pci_ers_result result) case PCI_ERS_RESULT_NEED_RESET: return 6; default: - WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", (int)result); + WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", result); return 0; } }; @@ -60,7 +60,7 @@ static const char *pci_ers_result_name(enum pci_ers_result result) case PCI_ERS_RESULT_NO_AER_DRIVER: return "no AER driver"; default: - WARN_ONCE(1, "Unknown result type: %d\n", (int)result); + WARN_ONCE(1, "Unknown result type: %d\n", result); return "unknown"; } }; @@ -257,13 +257,12 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn, struct pci_driver *driver; enum pci_ers_result new_result; - pci_lock_rescan_remove(); pdev = edev->pdev; if (pdev) get_device(&pdev->dev); - pci_unlock_rescan_remove(); if (!pdev) { eeh_edev_info(edev, "no device"); + *result = PCI_ERS_RESULT_DISCONNECT; return; } device_lock(&pdev->dev); @@ -304,8 +303,9 @@ static void eeh_pe_report(const char *name, struct eeh_pe *root, struct eeh_dev *edev, *tmp; pr_info("EEH: Beginning: '%s'\n", name); - eeh_for_each_pe(root, pe) eeh_pe_for_each_dev(pe, edev, tmp) - eeh_pe_report_edev(edev, fn, result); + eeh_for_each_pe(root, pe) + eeh_pe_for_each_dev(pe, edev, tmp) + eeh_pe_report_edev(edev, fn, result); if (result) pr_info("EEH: Finished:'%s' with aggregate recovery state:'%s'\n", name, pci_ers_result_name(*result)); @@ -334,7 +334,7 @@ static enum pci_ers_result eeh_report_error(struct eeh_dev *edev, rc = driver->err_handler->error_detected(pdev, pci_channel_io_frozen); edev->in_error = true; - pci_uevent_ers(pdev, PCI_ERS_RESULT_NONE); + pci_uevent_ers(pdev, rc); return rc; } @@ -383,6 +383,8 @@ static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata) if (!edev) return; + pci_lock_rescan_remove(); + /* * The content in the config space isn't saved because * the blocked config space on some adapters. We have @@ -393,14 +395,19 @@ static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata) if (list_is_last(&edev->entry, &edev->pe->edevs)) eeh_pe_restore_bars(edev->pe); + pci_unlock_rescan_remove(); return; } pdev = eeh_dev_to_pci_dev(edev); - if (!pdev) + if (!pdev) { + pci_unlock_rescan_remove(); return; + } pci_restore_state(pdev); + + pci_unlock_rescan_remove(); } /** @@ -647,9 +654,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, if (any_passed || driver_eeh_aware || (pe->type & EEH_PE_VF)) { eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data); } else { - pci_lock_rescan_remove(); pci_hp_remove_devices(bus); - pci_unlock_rescan_remove(); } /* @@ -665,8 +670,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, if (rc) return rc; - pci_lock_rescan_remove(); - /* Restore PE */ eeh_ops->configure_bridge(pe); eeh_pe_restore_bars(pe); @@ -674,7 +677,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, /* Clear frozen state */ rc = eeh_clear_pe_frozen_state(pe, false); if (rc) { - pci_unlock_rescan_remove(); return rc; } @@ -709,7 +711,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, pe->tstamp = tstamp; pe->freeze_count = cnt; - pci_unlock_rescan_remove(); return 0; } @@ -843,10 +844,13 @@ void eeh_handle_normal_event(struct eeh_pe *pe) {LIST_HEAD_INIT(rmv_data.removed_vf_list), 0}; int devices = 0; + pci_lock_rescan_remove(); + bus = eeh_pe_bus_get(pe); if (!bus) { pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->addr); + pci_unlock_rescan_remove(); return; } @@ -865,9 +869,18 @@ void eeh_handle_normal_event(struct eeh_pe *pe) devices++; if (!devices) { - pr_debug("EEH: Frozen PHB#%x-PE#%x is empty!\n", + pr_warn("EEH: Frozen PHB#%x-PE#%x is empty!\n", pe->phb->global_number, pe->addr); - goto out; /* nothing to recover */ + /* + * The device is removed, tear down its state, on powernv + * hotplug driver would take care of it but not on pseries, + * permanently disable the card as it is hot removed. + * + * In the case of powernv, note that the removal of device + * is covered by pci rescan lock, so no problem even if hotplug + * driver attempts to remove the device. + */ + goto recover_failed; } /* Log the event */ @@ -898,7 +911,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe) /* FIXME: Use the same format as dump_stack() */ pr_err("EEH: Call Trace:\n"); for (i = 0; i < pe->trace_entries; i++) - pr_err("EEH: [%pK] %pS\n", ptrs[i], ptrs[i]); + pr_err("EEH: [%p] %pS\n", ptrs[i], ptrs[i]); pe->trace_entries = 0; } @@ -1065,10 +1078,10 @@ recover_failed: eeh_slot_error_detail(pe, EEH_LOG_PERM); /* Notify all devices that they're about to go down. */ - eeh_set_channel_state(pe, pci_channel_io_perm_failure); eeh_set_irq_state(pe, false); eeh_pe_report("error_detected(permanent failure)", pe, eeh_report_failure, NULL); + eeh_set_channel_state(pe, pci_channel_io_perm_failure); /* Mark the PE to be removed permanently */ eeh_pe_state_mark(pe, EEH_PE_REMOVED); @@ -1085,10 +1098,15 @@ recover_failed: eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - pci_lock_rescan_remove(); - pci_hp_remove_devices(bus); - pci_unlock_rescan_remove(); + bus = eeh_pe_bus_get(pe); + if (bus) + pci_hp_remove_devices(bus); + else + pr_err("%s: PCI bus for PHB#%x-PE#%x disappeared\n", + __func__, pe->phb->global_number, pe->addr); + /* The passed PE should no longer be used */ + pci_unlock_rescan_remove(); return; } @@ -1105,6 +1123,8 @@ out: eeh_clear_slot_attention(edev->pdev); eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true); + + pci_unlock_rescan_remove(); } /** @@ -1123,6 +1143,7 @@ void eeh_handle_special_event(void) unsigned long flags; int rc; + pci_lock_rescan_remove(); do { rc = eeh_ops->next_error(&pe); @@ -1162,10 +1183,12 @@ void eeh_handle_special_event(void) break; case EEH_NEXT_ERR_NONE: + pci_unlock_rescan_remove(); return; default: pr_warn("%s: Invalid value %d from next_error()\n", __func__, rc); + pci_unlock_rescan_remove(); return; } @@ -1177,7 +1200,9 @@ void eeh_handle_special_event(void) if (rc == EEH_NEXT_ERR_FROZEN_PE || rc == EEH_NEXT_ERR_FENCED_PHB) { eeh_pe_state_mark(pe, EEH_PE_RECOVERING); + pci_unlock_rescan_remove(); eeh_handle_normal_event(pe); + pci_lock_rescan_remove(); } else { eeh_for_each_pe(pe, tmp_pe) eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev) @@ -1185,12 +1210,11 @@ void eeh_handle_special_event(void) /* Notify all devices to be down */ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); - eeh_set_channel_state(pe, pci_channel_io_perm_failure); eeh_pe_report( "error_detected(permanent failure)", pe, eeh_report_failure, NULL); + eeh_set_channel_state(pe, pci_channel_io_perm_failure); - pci_lock_rescan_remove(); list_for_each_entry(hose, &hose_list, list_node) { phb_pe = eeh_phb_pe_get(hose); if (!phb_pe || @@ -1209,7 +1233,6 @@ void eeh_handle_special_event(void) } pci_hp_remove_devices(bus); } - pci_unlock_rescan_remove(); } /* @@ -1219,4 +1242,6 @@ void eeh_handle_special_event(void) if (rc == EEH_NEXT_ERR_DEAD_IOC) break; } while (rc != EEH_NEXT_ERR_NONE); + + pci_unlock_rescan_remove(); } diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index d2873d17d2b1..e740101fadf3 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -24,10 +24,10 @@ static int eeh_pe_aux_size = 0; static LIST_HEAD(eeh_phb_pe); /** - * eeh_set_pe_aux_size - Set PE auxillary data size - * @size: PE auxillary data size + * eeh_set_pe_aux_size - Set PE auxiliary data size + * @size: PE auxiliary data size in bytes * - * Set PE auxillary data size + * Set PE auxiliary data size. */ void eeh_set_pe_aux_size(int size) { @@ -527,7 +527,7 @@ EXPORT_SYMBOL_GPL(eeh_pe_state_mark); * eeh_pe_mark_isolated * @pe: EEH PE * - * Record that a PE has been isolated by marking the PE and it's children as + * Record that a PE has been isolated by marking the PE and its children as * EEH_PE_ISOLATED (and EEH_PE_CFG_BLOCKED, if required) and their PCI devices * as pci_channel_io_frozen. */ @@ -671,11 +671,12 @@ static void eeh_bridge_check_link(struct eeh_dev *edev) eeh_ops->write_config(edev, cap + PCI_EXP_LNKCTL, 2, val); /* Check link */ - eeh_ops->read_config(edev, cap + PCI_EXP_LNKCAP, 4, &val); - if (!(val & PCI_EXP_LNKCAP_DLLLARC)) { - eeh_edev_dbg(edev, "No link reporting capability (0x%08x) \n", val); - msleep(1000); - return; + if (edev->pdev) { + if (!edev->pdev->link_active_reporting) { + eeh_edev_dbg(edev, "No link reporting capability\n"); + msleep(1000); + return; + } } /* Wait the link is up until timeout (5s) */ @@ -850,6 +851,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) { struct eeh_dev *edev; struct pci_dev *pdev; + struct pci_bus *bus = NULL; if (pe->type & EEH_PE_PHB) return pe->phb->bus; @@ -860,9 +862,11 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) /* Retrieve the parent PCI bus of first (top) PCI device */ edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry); + pci_lock_rescan_remove(); pdev = eeh_dev_to_pci_dev(edev); if (pdev) - return pdev->bus; + bus = pdev->bus; + pci_unlock_rescan_remove(); - return NULL; + return bus; } diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 5604c9a1ac22..16f8ee6cb2cd 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -29,7 +29,6 @@ #include <asm/asm-offsets.h> #include <asm/unistd.h> #include <asm/ptrace.h> -#include <asm/export.h> #include <asm/feature-fixups.h> #include <asm/barrier.h> #include <asm/kup.h> @@ -102,6 +101,17 @@ SYM_FUNC_END(__kuep_unlock) .endm #endif +.macro clr_ri trash +#ifndef CONFIG_BOOKE +#ifdef CONFIG_PPC_8xx + mtspr SPRN_NRI, \trash +#else + li \trash, MSR_KERNEL & ~MSR_RI + mtmsr \trash +#endif +#endif +.endm + .globl transfer_to_syscall transfer_to_syscall: stw r3, ORIG_GPR3(r1) @@ -109,7 +119,7 @@ transfer_to_syscall: stw r11, 0(r1) mflr r12 stw r12, _LINK(r1) -#ifdef CONFIG_BOOKE_OR_40x +#ifdef CONFIG_BOOKE rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ #endif lis r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ @@ -138,8 +148,9 @@ ret_from_syscall: lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) cmplwi cr0,r5,0 - bne- 2f + bne- .L44x_icache_flush #endif /* CONFIG_PPC_47x */ +.L44x_icache_flush_return: kuep_unlock lwz r4,_LINK(r1) lwz r5,_CCR(r1) @@ -149,6 +160,7 @@ ret_from_syscall: cmpwi r3,0 REST_GPR(3, r1) syscall_exit_finish: + clr_ri r4 mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r8 @@ -158,9 +170,6 @@ syscall_exit_finish: 1: REST_GPR(2, r1) REST_GPR(1, r1) rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif 3: mtcr r5 lwz r4,_CTR(r1) @@ -171,24 +180,25 @@ syscall_exit_finish: REST_GPR(0, r1) REST_GPRS(3, 12, r1) b 1b +_ASM_NOKPROBE_SYMBOL(syscall_exit_finish) #ifdef CONFIG_44x -2: li r7,0 +.L44x_icache_flush: + li r7,0 iccci r0,r0 stw r7,icache_44x_need_flush@l(r4) - b 1b + b .L44x_icache_flush_return #endif /* CONFIG_44x */ .globl ret_from_fork ret_from_fork: REST_NVGPRS(r1) bl schedule_tail - li r3,0 + li r3,0 /* fork() return value */ b ret_from_syscall - .globl ret_from_kernel_thread -ret_from_kernel_thread: - REST_NVGPRS(r1) + .globl ret_from_kernel_user_thread +ret_from_kernel_user_thread: bl schedule_tail mtctr r14 mr r3,r15 @@ -197,71 +207,29 @@ ret_from_kernel_thread: li r3,0 b ret_from_syscall -/* - * This routine switches between two different tasks. The process - * state of one is saved on its kernel stack. Then the state - * of the other is restored from its kernel stack. The memory - * management hardware is updated to the second process's state. - * Finally, we can return to the second process. - * On entry, r3 points to the THREAD for the current task, r4 - * points to the THREAD for the new task. - * - * This routine is always called with interrupts disabled. - * - * Note: there are two ways to get to the "going out" portion - * of this code; either by coming in via the entry (_switch) - * or via "fork" which must set up an environment equivalent - * to the "_switch" path. If you change this , you'll have to - * change the fork code also. - * - * The code which creates the new task context is in 'copy_thread' - * in arch/ppc/kernel/process.c - */ -_GLOBAL(_switch) - stwu r1,-SWITCH_FRAME_SIZE(r1) - mflr r0 - stw r0,SWITCH_FRAME_SIZE+4(r1) - /* r3-r12 are caller saved -- Cort */ - SAVE_NVGPRS(r1) - stw r0,_NIP(r1) /* Return to switch caller */ - mfcr r10 - stw r10,_CCR(r1) - stw r1,KSP(r3) /* Set old stack pointer */ - -#ifdef CONFIG_SMP - /* We need a sync somewhere here to make sure that if the - * previous task gets rescheduled on another CPU, it sees all - * stores it has performed on this one. + .globl start_kernel_thread +start_kernel_thread: + bl schedule_tail + mtctr r14 + mr r3,r15 + PPC440EP_ERR42 + bctrl + /* + * This must not return. We actually want to BUG here, not WARN, + * because BUG will exit the process which is what the kernel thread + * should have done, which may give some hope of continuing. */ - sync -#endif /* CONFIG_SMP */ - - tophys(r0,r4) - mtspr SPRN_SPRG_THREAD,r0 /* Update current THREAD phys addr */ - lwz r1,KSP(r4) /* Load new stack pointer */ - - /* save the old current 'last' for return value */ - mr r3,r2 - addi r2,r4,-THREAD /* Update current */ - - lwz r0,_CCR(r1) - mtcrf 0xFF,r0 - /* r3-r12 are destroyed -- Cort */ - REST_NVGPRS(r1) - - lwz r4,_NIP(r1) /* Return to _switch caller in new task */ - mtlr r4 - addi r1,r1,SWITCH_FRAME_SIZE - blr +100: trap + EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0 .globl fast_exception_return fast_exception_return: -#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) +#ifndef CONFIG_BOOKE andi. r10,r9,MSR_RI /* check for recoverable interrupt */ beq 3f /* if not, we've got problems */ #endif -2: lwz r10,_CCR(r11) + lwz r10,_CCR(r11) REST_GPRS(1, 6, r11) mtcr r10 lwz r10,_LINK(r11) @@ -269,19 +237,14 @@ fast_exception_return: /* Clear the exception marker on the stack to avoid confusing stacktrace */ li r10, 0 stw r10, 8(r11) - REST_GPR(10, r11) -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) - mtspr SPRN_NRI, r0 -#endif + clr_ri r10 mtspr SPRN_SRR1,r9 mtspr SPRN_SRR0,r12 REST_GPR(9, r11) + REST_GPR(10, r11) REST_GPR(12, r11) REST_GPR(11, r11) rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif _ASM_NOKPROBE_SYMBOL(fast_exception_return) /* aargh, a nonrecoverable interrupt, panic */ @@ -307,14 +270,14 @@ interrupt_return: .Lfast_user_interrupt_return: lwz r11,_NIP(r1) lwz r12,_MSR(r1) + clr_ri r4 mtspr SPRN_SRR0,r11 mtspr SPRN_SRR1,r12 BEGIN_FTR_SECTION + lwarx r0,0,r1 +END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) stwcx. r0,0,r1 /* to clear the reservation */ -FTR_SECTION_ELSE - lwarx r0,0,r1 -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) lwz r3,_CCR(r1) lwz r4,_LINK(r1) @@ -338,9 +301,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) REST_GPR(0, r1) REST_GPR(1, r1) rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif .Lrestore_nvgprs: REST_NVGPRS(r1) @@ -353,14 +313,14 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) cmpwi cr1,r3,0 lwz r11,_NIP(r1) lwz r12,_MSR(r1) + clr_ri r4 mtspr SPRN_SRR0,r11 mtspr SPRN_SRR1,r12 BEGIN_FTR_SECTION + lwarx r0,0,r1 +END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) stwcx. r0,0,r1 /* to clear the reservation */ -FTR_SECTION_ELSE - lwarx r0,0,r1 -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) lwz r3,_LINK(r1) lwz r4,_CTR(r1) @@ -388,9 +348,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) REST_GPR(0, r1) REST_GPR(1, r1) rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif 1: /* * Emulate stack store with update. New r1 value was already calculated @@ -417,12 +374,9 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) mfspr r9, SPRN_SPRG_SCRATCH0 #endif rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif _ASM_NOKPROBE_SYMBOL(interrupt_return) -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#ifdef CONFIG_BOOKE /* * Returning from a critical interrupt in user mode doesn't need @@ -437,17 +391,6 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) * time of the critical interrupt. * */ -#ifdef CONFIG_40x -#define PPC_40x_TURN_OFF_MSR_DR \ - /* avoid any possible TLB misses here by turning off MSR.DR, we \ - * assume the instructions here are mapped by a pinned TLB entry */ \ - li r10,MSR_IR; \ - mtmsr r10; \ - isync; \ - tophys(r1, r1); -#else -#define PPC_40x_TURN_OFF_MSR_DR -#endif #define RET_FROM_EXC_LEVEL(exc_lvl_srr0, exc_lvl_srr1, exc_lvl_rfi) \ REST_NVGPRS(r1); \ @@ -465,7 +408,6 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) mtlr r11; \ lwz r10,_CCR(r1); \ mtcrf 0xff,r10; \ - PPC_40x_TURN_OFF_MSR_DR; \ lwz r9,_DEAR(r1); \ lwz r10,_ESR(r1); \ mtspr SPRN_DEAR,r9; \ @@ -513,20 +455,6 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) #define RESTORE_MMU_REGS #endif -#ifdef CONFIG_40x - .globl ret_from_crit_exc -ret_from_crit_exc: - lis r9,crit_srr0@ha; - lwz r9,crit_srr0@l(r9); - lis r10,crit_srr1@ha; - lwz r10,crit_srr1@l(r10); - mtspr SPRN_SRR0,r9; - mtspr SPRN_SRR1,r10; - RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI) -_ASM_NOKPROBE_SYMBOL(ret_from_crit_exc) -#endif /* CONFIG_40x */ - -#ifdef CONFIG_BOOKE .globl ret_from_crit_exc ret_from_crit_exc: RESTORE_xSRR(SRR0,SRR1); @@ -551,4 +479,3 @@ ret_from_mcheck_exc: RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI) _ASM_NOKPROBE_SYMBOL(ret_from_mcheck_exc) #endif /* CONFIG_BOOKE */ -#endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S index 69a912550577..6a414ed5a411 100644 --- a/arch/powerpc/kernel/epapr_hcalls.S +++ b/arch/powerpc/kernel/epapr_hcalls.S @@ -3,6 +3,7 @@ * Copyright (C) 2012 Freescale Semiconductor, Inc. */ +#include <linux/export.h> #include <linux/threads.h> #include <asm/epapr_hcalls.h> #include <asm/reg.h> @@ -12,7 +13,6 @@ #include <asm/ppc_asm.h> #include <asm/asm-compat.h> #include <asm/asm-offsets.h> -#include <asm/export.h> #ifndef CONFIG_PPC64 /* epapr_ev_idle() was derived from e500_idle() */ @@ -21,7 +21,13 @@ _GLOBAL(epapr_ev_idle) ori r4, r4,_TLF_NAPPING /* so when we take an exception */ PPC_STL r4, TI_LOCAL_FLAGS(r2) /* it will return to our caller */ +#ifdef CONFIG_BOOKE wrteei 1 +#else + mfmsr r4 + ori r4, r4, MSR_EE + mtmsr r4 +#endif idle_loop: LOAD_REG_IMMEDIATE(r11, EV_HCALL_TOKEN(EV_IDLE)) diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index d4b8aff20815..247ab2acaccc 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -9,7 +9,7 @@ #include <linux/of_fdt.h> #include <asm/epapr_hcalls.h> #include <asm/cacheflush.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/machdep.h> #include <asm/inst.h> diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 3f86091e68b3..63f6b9f513a4 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -5,6 +5,7 @@ * Copyright (C) 2007 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp. */ +#include <linux/linkage.h> #include <linux/threads.h> #include <asm/reg.h> #include <asm/page.h> @@ -13,7 +14,6 @@ #include <asm/cputable.h> #include <asm/setup.h> #include <asm/thread_info.h> -#include <asm/reg_a2.h> #include <asm/exception-64e.h> #include <asm/bug.h> #include <asm/irqflags.h> @@ -66,7 +66,7 @@ #define SPECIAL_EXC_LOAD(reg, name) \ ld reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1) -special_reg_save: +SYM_CODE_START_LOCAL(special_reg_save) /* * We only need (or have stack space) to save this stuff if * we interrupted the kernel. @@ -131,8 +131,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) SPECIAL_EXC_STORE(r10,CSRR1) blr +SYM_CODE_END(special_reg_save) -ret_from_level_except: +SYM_CODE_START_LOCAL(ret_from_level_except) ld r3,_MSR(r1) andi. r3,r3,MSR_PR beq 1f @@ -206,6 +207,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) mtxer r11 blr +SYM_CODE_END(ret_from_level_except) .macro ret_from_level srr0 srr1 paca_ex scratch bl ret_from_level_except @@ -232,13 +234,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) mfspr r13,\scratch .endm -ret_from_crit_except: +SYM_CODE_START_LOCAL(ret_from_crit_except) ret_from_level SPRN_CSRR0 SPRN_CSRR1 PACA_EXCRIT SPRN_SPRG_CRIT_SCRATCH rfci +SYM_CODE_END(ret_from_crit_except) -ret_from_mc_except: +SYM_CODE_START_LOCAL(ret_from_mc_except) ret_from_level SPRN_MCSRR0 SPRN_MCSRR1 PACA_EXMC SPRN_SPRG_MC_SCRATCH rfmci +SYM_CODE_END(ret_from_mc_except) /* Exception prolog code for all exceptions */ #define EXCEPTION_PROLOG(n, intnum, type, addition) \ @@ -481,8 +485,8 @@ interrupt_base_book3e: /* fake trap */ EXCEPTION_STUB(0x160, decrementer) /* 0x0900 */ EXCEPTION_STUB(0x180, fixed_interval) /* 0x0980 */ EXCEPTION_STUB(0x1a0, watchdog) /* 0x09f0 */ - EXCEPTION_STUB(0x1c0, data_tlb_miss) - EXCEPTION_STUB(0x1e0, instruction_tlb_miss) + EXCEPTION_STUB(0x1c0, data_tlb_miss_bolted) + EXCEPTION_STUB(0x1e0, instruction_tlb_miss_bolted) EXCEPTION_STUB(0x200, altivec_unavailable) EXCEPTION_STUB(0x220, altivec_assist) EXCEPTION_STUB(0x260, perfmon) @@ -978,20 +982,22 @@ masked_interrupt_book3e_0x2c0: * r14 and r15 containing the fault address and error code, with the * original values stashed away in the PACA */ -storage_fault_common: +SYM_CODE_START_LOCAL(storage_fault_common) addi r3,r1,STACK_INT_FRAME_REGS bl do_page_fault b interrupt_return +SYM_CODE_END(storage_fault_common) /* * Alignment exception doesn't fit entirely in the 0x100 bytes so it * continues here. */ -alignment_more: +SYM_CODE_START_LOCAL(alignment_more) addi r3,r1,STACK_INT_FRAME_REGS bl alignment_exception REST_NVGPRS(r1) b interrupt_return +SYM_CODE_END(alignment_more) /* * Trampolines used when spotting a bad kernel stack pointer in @@ -1030,8 +1036,7 @@ BAD_STACK_TRAMPOLINE(0xe00) BAD_STACK_TRAMPOLINE(0xf00) BAD_STACK_TRAMPOLINE(0xf20) - .globl bad_stack_book3e -bad_stack_book3e: +_GLOBAL(bad_stack_book3e) /* XXX: Needs to make SPRN_SPRG_GEN depend on exception type */ mfspr r10,SPRN_SRR0; /* read SRR0 before touching stack */ ld r1,PACAEMERGSP(r13) @@ -1285,8 +1290,7 @@ have_hes: * ever takes any parameters, the SCOM code must also be updated to * provide them. */ - .globl a2_tlbinit_code_start -a2_tlbinit_code_start: +_GLOBAL(a2_tlbinit_code_start) ori r11,r3,MAS0_WQ_ALLWAYS oris r11,r11,MAS0_ESEL(3)@h /* Use way 3: workaround A2 erratum 376 */ @@ -1479,8 +1483,7 @@ _GLOBAL(book3e_secondary_thread_init) mflr r28 b 3b - .globl init_core_book3e -init_core_book3e: +_GLOBAL(init_core_book3e) /* Establish the interrupt vector base */ tovirt(r2,r2) LOAD_REG_ADDR(r3, interrupt_base_book3e) @@ -1488,7 +1491,7 @@ init_core_book3e: sync blr -init_thread_book3e: +SYM_CODE_START_LOCAL(init_thread_book3e) lis r3,(SPRN_EPCR_ICM | SPRN_EPCR_GICM)@h mtspr SPRN_EPCR,r3 @@ -1502,6 +1505,7 @@ init_thread_book3e: mtspr SPRN_TSR,r3 blr +SYM_CODE_END(init_thread_book3e) _GLOBAL(__setup_base_ivors) SET_IVOR(0, 0x020) /* Critical Input */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6441a1ba57ac..b7229430ca94 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -893,7 +893,7 @@ __start_interrupts: * * Call convention: * - * syscall register convention is in Documentation/powerpc/syscall64-abi.rst + * syscall register convention is in Documentation/arch/powerpc/syscall64-abi.rst */ EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000) /* SCV 0 */ @@ -1075,7 +1075,7 @@ EXC_COMMON_BEGIN(system_reset_common) __GEN_COMMON_BODY system_reset addi r3,r1,STACK_INT_FRAME_REGS - bl system_reset_exception + bl CFUNC(system_reset_exception) /* Clear MSR_RI before setting SRR0 and SRR1. */ li r9,0 @@ -1223,9 +1223,9 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) addi r3,r1,STACK_INT_FRAME_REGS BEGIN_FTR_SECTION - bl machine_check_early_boot + bl CFUNC(machine_check_early_boot) END_FTR_SECTION(0, 1) // nop out after boot - bl machine_check_early + bl CFUNC(machine_check_early) std r3,RESULT(r1) /* Save result */ ld r12,_MSR(r1) @@ -1286,7 +1286,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) * Queue up the MCE event so that we can log it later, while * returning from kernel or opal call. */ - bl machine_check_queue_event + bl CFUNC(machine_check_queue_event) MACHINE_CHECK_HANDLER_WINDUP RFI_TO_KERNEL @@ -1312,7 +1312,7 @@ EXC_COMMON_BEGIN(machine_check_common) */ GEN_COMMON machine_check addi r3,r1,STACK_INT_FRAME_REGS - bl machine_check_exception_async + bl CFUNC(machine_check_exception_async) b interrupt_return_srr @@ -1322,7 +1322,7 @@ EXC_COMMON_BEGIN(machine_check_common) * done. Queue the event then call the idle code to do the wake up. */ EXC_COMMON_BEGIN(machine_check_idle_common) - bl machine_check_queue_event + bl CFUNC(machine_check_queue_event) /* * GPR-loss wakeups are relatively straightforward, because the @@ -1361,7 +1361,7 @@ EXC_COMMON_BEGIN(unrecoverable_mce) BEGIN_FTR_SECTION li r10,0 /* clear MSR_RI */ mtmsrd r10,1 - bl disable_machine_check + bl CFUNC(disable_machine_check) END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) ld r10,PACAKMSR(r13) li r3,MSR_ME @@ -1378,14 +1378,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) * the early handler which is a true NMI. */ addi r3,r1,STACK_INT_FRAME_REGS - bl machine_check_exception + bl CFUNC(machine_check_exception) /* * We will not reach here. Even if we did, there is no way out. * Call unrecoverable_exception and die. */ addi r3,r1,STACK_INT_FRAME_REGS - bl unrecoverable_exception + bl CFUNC(unrecoverable_exception) b . @@ -1440,16 +1440,16 @@ EXC_COMMON_BEGIN(data_access_common) bne- 1f #ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION - bl do_hash_fault + bl CFUNC(do_hash_fault) MMU_FTR_SECTION_ELSE - bl do_page_fault + bl CFUNC(do_page_fault) ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) #else - bl do_page_fault + bl CFUNC(do_page_fault) #endif b interrupt_return_srr -1: bl do_break +1: bl CFUNC(do_break) /* * do_break() may have changed the NV GPRS while handling a breakpoint. * If so, we need to restore them with their updated values. @@ -1493,7 +1493,7 @@ EXC_COMMON_BEGIN(data_access_slb_common) BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ addi r3,r1,STACK_INT_FRAME_REGS - bl do_slb_fault + bl CFUNC(do_slb_fault) cmpdi r3,0 bne- 1f b fast_interrupt_return_srr @@ -1507,7 +1507,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) #endif std r3,RESULT(r1) addi r3,r1,STACK_INT_FRAME_REGS - bl do_bad_segment_interrupt + bl CFUNC(do_bad_segment_interrupt) b interrupt_return_srr @@ -1541,12 +1541,12 @@ EXC_COMMON_BEGIN(instruction_access_common) addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION - bl do_hash_fault + bl CFUNC(do_hash_fault) MMU_FTR_SECTION_ELSE - bl do_page_fault + bl CFUNC(do_page_fault) ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) #else - bl do_page_fault + bl CFUNC(do_page_fault) #endif b interrupt_return_srr @@ -1581,7 +1581,7 @@ EXC_COMMON_BEGIN(instruction_access_slb_common) BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ addi r3,r1,STACK_INT_FRAME_REGS - bl do_slb_fault + bl CFUNC(do_slb_fault) cmpdi r3,0 bne- 1f b fast_interrupt_return_srr @@ -1595,7 +1595,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) #endif std r3,RESULT(r1) addi r3,r1,STACK_INT_FRAME_REGS - bl do_bad_segment_interrupt + bl CFUNC(do_bad_segment_interrupt) b interrupt_return_srr @@ -1649,7 +1649,7 @@ EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) EXC_COMMON_BEGIN(hardware_interrupt_common) GEN_COMMON hardware_interrupt addi r3,r1,STACK_INT_FRAME_REGS - bl do_IRQ + bl CFUNC(do_IRQ) BEGIN_FTR_SECTION b interrupt_return_hsrr FTR_SECTION_ELSE @@ -1679,7 +1679,7 @@ EXC_VIRT_END(alignment, 0x4600, 0x100) EXC_COMMON_BEGIN(alignment_common) GEN_COMMON alignment addi r3,r1,STACK_INT_FRAME_REGS - bl alignment_exception + bl CFUNC(alignment_exception) HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -1745,7 +1745,7 @@ EXC_COMMON_BEGIN(program_check_common) .Ldo_program_check: addi r3,r1,STACK_INT_FRAME_REGS - bl program_check_exception + bl CFUNC(program_check_exception) HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -1777,7 +1777,7 @@ EXC_COMMON_BEGIN(fp_unavailable_common) GEN_COMMON fp_unavailable bne 1f /* if from user, just load it up */ addi r3,r1,STACK_INT_FRAME_REGS - bl kernel_fp_unavailable_exception + bl CFUNC(kernel_fp_unavailable_exception) 0: trap EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0 1: @@ -1790,12 +1790,12 @@ BEGIN_FTR_SECTION bne- 2f END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif - bl load_up_fpu + bl CFUNC(load_up_fpu) b fast_interrupt_return_srr #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ addi r3,r1,STACK_INT_FRAME_REGS - bl fp_unavailable_tm + bl CFUNC(fp_unavailable_tm) b interrupt_return_srr #endif @@ -1839,7 +1839,7 @@ EXC_VIRT_END(decrementer, 0x4900, 0x80) EXC_COMMON_BEGIN(decrementer_common) GEN_COMMON decrementer addi r3,r1,STACK_INT_FRAME_REGS - bl timer_interrupt + bl CFUNC(timer_interrupt) b interrupt_return_srr @@ -1925,9 +1925,9 @@ EXC_COMMON_BEGIN(doorbell_super_common) GEN_COMMON doorbell_super addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_PPC_DOORBELL - bl doorbell_exception + bl CFUNC(doorbell_exception) #else - bl unknown_async_exception + bl CFUNC(unknown_async_exception) #endif b interrupt_return_srr @@ -1952,8 +1952,8 @@ EXC_VIRT_NONE(0x4b00, 0x100) * Call convention: * * syscall and hypercalls register conventions are documented in - * Documentation/powerpc/syscall64-abi.rst and - * Documentation/powerpc/papr_hcalls.rst respectively. + * Documentation/arch/powerpc/syscall64-abi.rst and + * Documentation/arch/powerpc/papr_hcalls.rst respectively. * * The intersection of volatile registers that don't contain possible * inputs is: cr0, xer, ctr. We may use these as scratch regs upon entry @@ -1989,13 +1989,6 @@ INT_DEFINE_END(system_call) INTERRUPT_TO_KERNEL #endif -#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH -BEGIN_FTR_SECTION - cmpdi r0,0x1ebe - beq- 1f -END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) -#endif - /* We reach here with PACA in r13, r13 in r9. */ mfspr r11,SPRN_SRR0 mfspr r12,SPRN_SRR1 @@ -2015,16 +2008,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) b system_call_common #endif .endif - -#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH - /* Fast LE/BE switch system call */ -1: mfspr r12,SPRN_SRR1 - xori r12,r12,MSR_LE - mtspr SPRN_SRR1,r12 - mr r13,r9 - RFI_TO_USER /* return to userspace */ - b . /* prevent speculative execution */ -#endif .endm EXC_REAL_BEGIN(system_call, 0xc00, 0x100) @@ -2091,7 +2074,7 @@ EXC_VIRT_END(single_step, 0x4d00, 0x100) EXC_COMMON_BEGIN(single_step_common) GEN_COMMON single_step addi r3,r1,STACK_INT_FRAME_REGS - bl single_step_exception + bl CFUNC(single_step_exception) b interrupt_return_srr @@ -2126,9 +2109,9 @@ EXC_COMMON_BEGIN(h_data_storage_common) GEN_COMMON h_data_storage addi r3,r1,STACK_INT_FRAME_REGS BEGIN_MMU_FTR_SECTION - bl do_bad_page_fault_segv + bl CFUNC(do_bad_page_fault_segv) MMU_FTR_SECTION_ELSE - bl unknown_exception + bl CFUNC(unknown_exception) ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX) b interrupt_return_hsrr @@ -2154,7 +2137,7 @@ EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20) EXC_COMMON_BEGIN(h_instr_storage_common) GEN_COMMON h_instr_storage addi r3,r1,STACK_INT_FRAME_REGS - bl unknown_exception + bl CFUNC(unknown_exception) b interrupt_return_hsrr @@ -2177,7 +2160,7 @@ EXC_VIRT_END(emulation_assist, 0x4e40, 0x20) EXC_COMMON_BEGIN(emulation_assist_common) GEN_COMMON emulation_assist addi r3,r1,STACK_INT_FRAME_REGS - bl emulation_assist_interrupt + bl CFUNC(emulation_assist_interrupt) HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_hsrr @@ -2237,7 +2220,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) __GEN_COMMON_BODY hmi_exception_early addi r3,r1,STACK_INT_FRAME_REGS - bl hmi_exception_realmode + bl CFUNC(hmi_exception_realmode) cmpdi cr0,r3,0 bne 1f @@ -2255,7 +2238,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) EXC_COMMON_BEGIN(hmi_exception_common) GEN_COMMON hmi_exception addi r3,r1,STACK_INT_FRAME_REGS - bl handle_hmi_exception + bl CFUNC(handle_hmi_exception) b interrupt_return_hsrr @@ -2290,9 +2273,9 @@ EXC_COMMON_BEGIN(h_doorbell_common) GEN_COMMON h_doorbell addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_PPC_DOORBELL - bl doorbell_exception + bl CFUNC(doorbell_exception) #else - bl unknown_async_exception + bl CFUNC(unknown_async_exception) #endif b interrupt_return_hsrr @@ -2325,7 +2308,7 @@ EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20) EXC_COMMON_BEGIN(h_virt_irq_common) GEN_COMMON h_virt_irq addi r3,r1,STACK_INT_FRAME_REGS - bl do_IRQ + bl CFUNC(do_IRQ) b interrupt_return_hsrr @@ -2374,10 +2357,10 @@ EXC_COMMON_BEGIN(performance_monitor_common) lbz r4,PACAIRQSOFTMASK(r13) cmpdi r4,IRQS_ENABLED bne 1f - bl performance_monitor_exception_async + bl CFUNC(performance_monitor_exception_async) b interrupt_return_srr 1: - bl performance_monitor_exception_nmi + bl CFUNC(performance_monitor_exception_nmi) /* Clear MSR_RI before setting SRR0 and SRR1. */ li r9,0 mtmsrd r9,1 @@ -2421,19 +2404,19 @@ BEGIN_FTR_SECTION bne- 2f END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69) #endif - bl load_up_altivec + bl CFUNC(load_up_altivec) b fast_interrupt_return_srr #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ addi r3,r1,STACK_INT_FRAME_REGS - bl altivec_unavailable_tm + bl CFUNC(altivec_unavailable_tm) b interrupt_return_srr #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif addi r3,r1,STACK_INT_FRAME_REGS - bl altivec_unavailable_exception + bl CFUNC(altivec_unavailable_exception) b interrupt_return_srr @@ -2475,14 +2458,14 @@ BEGIN_FTR_SECTION #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ addi r3,r1,STACK_INT_FRAME_REGS - bl vsx_unavailable_tm + bl CFUNC(vsx_unavailable_tm) b interrupt_return_srr #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif addi r3,r1,STACK_INT_FRAME_REGS - bl vsx_unavailable_exception + bl CFUNC(vsx_unavailable_exception) b interrupt_return_srr @@ -2509,7 +2492,7 @@ EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20) EXC_COMMON_BEGIN(facility_unavailable_common) GEN_COMMON facility_unavailable addi r3,r1,STACK_INT_FRAME_REGS - bl facility_unavailable_exception + bl CFUNC(facility_unavailable_exception) HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -2537,7 +2520,7 @@ EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20) EXC_COMMON_BEGIN(h_facility_unavailable_common) GEN_COMMON h_facility_unavailable addi r3,r1,STACK_INT_FRAME_REGS - bl facility_unavailable_exception + bl CFUNC(facility_unavailable_exception) /* XXX Shouldn't be necessary in practice */ HANDLER_RESTORE_NVGPRS() b interrupt_return_hsrr @@ -2554,27 +2537,8 @@ EXC_REAL_NONE(0x1000, 0x100) EXC_VIRT_NONE(0x5000, 0x100) EXC_REAL_NONE(0x1100, 0x100) EXC_VIRT_NONE(0x5100, 0x100) - -#ifdef CONFIG_CBE_RAS -INT_DEFINE_BEGIN(cbe_system_error) - IVEC=0x1200 - IHSRR=1 -INT_DEFINE_END(cbe_system_error) - -EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100) - GEN_INT_ENTRY cbe_system_error, virt=0 -EXC_REAL_END(cbe_system_error, 0x1200, 0x100) -EXC_VIRT_NONE(0x5200, 0x100) -EXC_COMMON_BEGIN(cbe_system_error_common) - GEN_COMMON cbe_system_error - addi r3,r1,STACK_INT_FRAME_REGS - bl cbe_system_error_exception - b interrupt_return_hsrr - -#else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1200, 0x100) EXC_VIRT_NONE(0x5200, 0x100) -#endif /** * Interrupt 0x1300 - Instruction Address Breakpoint Interrupt. @@ -2599,7 +2563,7 @@ EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100) EXC_COMMON_BEGIN(instruction_breakpoint_common) GEN_COMMON instruction_breakpoint addi r3,r1,STACK_INT_FRAME_REGS - bl instruction_breakpoint_exception + bl CFUNC(instruction_breakpoint_exception) b interrupt_return_srr @@ -2721,30 +2685,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) EXC_COMMON_BEGIN(denorm_exception_common) GEN_COMMON denorm_exception addi r3,r1,STACK_INT_FRAME_REGS - bl unknown_exception + bl CFUNC(unknown_exception) b interrupt_return_hsrr -#ifdef CONFIG_CBE_RAS -INT_DEFINE_BEGIN(cbe_maintenance) - IVEC=0x1600 - IHSRR=1 -INT_DEFINE_END(cbe_maintenance) - -EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100) - GEN_INT_ENTRY cbe_maintenance, virt=0 -EXC_REAL_END(cbe_maintenance, 0x1600, 0x100) -EXC_VIRT_NONE(0x5600, 0x100) -EXC_COMMON_BEGIN(cbe_maintenance_common) - GEN_COMMON cbe_maintenance - addi r3,r1,STACK_INT_FRAME_REGS - bl cbe_maintenance_exception - b interrupt_return_hsrr - -#else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1600, 0x100) EXC_VIRT_NONE(0x5600, 0x100) -#endif INT_DEFINE_BEGIN(altivec_assist) @@ -2764,34 +2710,16 @@ EXC_COMMON_BEGIN(altivec_assist_common) GEN_COMMON altivec_assist addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_ALTIVEC - bl altivec_assist_exception + bl CFUNC(altivec_assist_exception) HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ #else - bl unknown_exception + bl CFUNC(unknown_exception) #endif b interrupt_return_srr -#ifdef CONFIG_CBE_RAS -INT_DEFINE_BEGIN(cbe_thermal) - IVEC=0x1800 - IHSRR=1 -INT_DEFINE_END(cbe_thermal) - -EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100) - GEN_INT_ENTRY cbe_thermal, virt=0 -EXC_REAL_END(cbe_thermal, 0x1800, 0x100) -EXC_VIRT_NONE(0x5800, 0x100) -EXC_COMMON_BEGIN(cbe_thermal_common) - GEN_COMMON cbe_thermal - addi r3,r1,STACK_INT_FRAME_REGS - bl cbe_thermal_exception - b interrupt_return_hsrr - -#else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1800, 0x100) EXC_VIRT_NONE(0x5800, 0x100) -#endif #ifdef CONFIG_PPC_WATCHDOG @@ -2818,7 +2746,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) __GEN_COMMON_BODY soft_nmi addi r3,r1,STACK_INT_FRAME_REGS - bl soft_nmi_interrupt + bl CFUNC(soft_nmi_interrupt) /* Clear MSR_RI before setting SRR0 and SRR1. */ li r9,0 diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index ea0a073abd96..4ebc333dd786 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -33,6 +33,7 @@ #include <asm/fadump-internal.h> #include <asm/setup.h> #include <asm/interrupt.h> +#include <asm/prom.h> /* * The CPU who acquired the lock to trigger the fadump crash should @@ -53,8 +54,6 @@ static struct kobject *fadump_kobj; static atomic_t cpus_in_fadump; static DEFINE_MUTEX(fadump_mutex); -static struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false }; - #define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */ #define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \ sizeof(struct fadump_memory_range)) @@ -80,26 +79,38 @@ static struct cma *fadump_cma; * But for some reason even if it fails we still have the memory reservation * with us and we can still continue doing fadump. */ -static int __init fadump_cma_init(void) +void __init fadump_cma_init(void) { - unsigned long long base, size; + unsigned long long base, size, end; int rc; - if (!fw_dump.fadump_enabled) - return 0; - + if (!fw_dump.fadump_supported || !fw_dump.fadump_enabled || + fw_dump.dump_active) + return; /* * Do not use CMA if user has provided fadump=nocma kernel parameter. - * Return 1 to continue with fadump old behaviour. */ - if (fw_dump.nocma) - return 1; + if (fw_dump.nocma || !fw_dump.boot_memory_size) + return; + /* + * [base, end) should be reserved during early init in + * fadump_reserve_mem(). No need to check this here as + * cma_init_reserved_mem() already checks for overlap. + * Here we give the aligned chunk of this reserved memory to CMA. + */ base = fw_dump.reserve_dump_area_start; size = fw_dump.boot_memory_size; + end = base + size; - if (!size) - return 0; + base = ALIGN(base, CMA_MIN_ALIGNMENT_BYTES); + end = ALIGN_DOWN(end, CMA_MIN_ALIGNMENT_BYTES); + size = end - base; + + if (end <= base) { + pr_warn("%s: Too less memory to give to CMA\n", __func__); + return; + } rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma); if (rc) { @@ -110,7 +121,7 @@ static int __init fadump_cma_init(void) * blocked from production system usage. Hence return 1, * so that we can continue with fadump. */ - return 1; + return; } /* @@ -122,17 +133,50 @@ static int __init fadump_cma_init(void) /* * So we now have successfully initialized cma area for fadump. */ - pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx " + pr_info("Initialized [0x%llx, %luMB] cma area from [0x%lx, %luMB] " "bytes of memory reserved for firmware-assisted dump\n", - cma_get_size(fadump_cma), - (unsigned long)cma_get_base(fadump_cma) >> 20, - fw_dump.reserve_dump_area_size); - return 1; + cma_get_base(fadump_cma), cma_get_size(fadump_cma) >> 20, + fw_dump.reserve_dump_area_start, + fw_dump.boot_memory_size >> 20); + return; } -#else -static int __init fadump_cma_init(void) { return 1; } #endif /* CONFIG_CMA */ +/* + * Additional parameters meant for capture kernel are placed in a dedicated area. + * If this is capture kernel boot, append these parameters to bootargs. + */ +void __init fadump_append_bootargs(void) +{ + char *append_args; + size_t len; + + if (!fw_dump.dump_active || !fw_dump.param_area_supported || !fw_dump.param_area) + return; + + if (fw_dump.param_area < fw_dump.boot_mem_top) { + if (memblock_reserve(fw_dump.param_area, COMMAND_LINE_SIZE)) { + pr_warn("WARNING: Can't use additional parameters area!\n"); + fw_dump.param_area = 0; + return; + } + } + + append_args = (char *)fw_dump.param_area; + len = strlen(boot_command_line); + + /* + * Too late to fail even if cmdline size exceeds. Truncate additional parameters + * to cmdline size and proceed anyway. + */ + if (len + strlen(append_args) >= COMMAND_LINE_SIZE - 1) + pr_warn("WARNING: Appending parameters exceeds cmdline size. Truncating!\n"); + + pr_debug("Cmdline: %s\n", boot_command_line); + snprintf(boot_command_line + len, COMMAND_LINE_SIZE - len, " %s", append_args); + pr_info("Updated cmdline: %s\n", boot_command_line); +} + /* Scan the Firmware Assisted dump configuration details. */ int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, int depth, void *data) @@ -223,28 +267,6 @@ static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end) } /* - * Returns true, if there are no holes in boot memory area, - * false otherwise. - */ -bool is_fadump_boot_mem_contiguous(void) -{ - unsigned long d_start, d_end; - bool ret = false; - int i; - - for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { - d_start = fw_dump.boot_mem_addr[i]; - d_end = d_start + fw_dump.boot_mem_sz[i]; - - ret = is_fadump_mem_area_contiguous(d_start, d_end); - if (!ret) - break; - } - - return ret; -} - -/* * Returns true, if there are no holes in reserved memory area, * false otherwise. */ @@ -268,10 +290,8 @@ static void __init fadump_show_config(void) if (!fw_dump.fadump_supported) return; - pr_debug("Fadump enabled : %s\n", - (fw_dump.fadump_enabled ? "yes" : "no")); - pr_debug("Dump Active : %s\n", - (fw_dump.dump_active ? "yes" : "no")); + pr_debug("Fadump enabled : %s\n", str_yes_no(fw_dump.fadump_enabled)); + pr_debug("Dump Active : %s\n", str_yes_no(fw_dump.dump_active)); pr_debug("Dump section sizes:\n"); pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size); pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size); @@ -313,7 +333,7 @@ static __init u64 fadump_calculate_reserve_size(void) * memory at a predefined offset. */ ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), - &size, &base); + &size, &base, NULL, NULL, NULL); if (ret == 0 && size > 0) { unsigned long max_size; @@ -373,12 +393,6 @@ static unsigned long __init get_fadump_area_size(void) size = PAGE_ALIGN(size); size += fw_dump.boot_memory_size; size += sizeof(struct fadump_crash_info_header); - size += sizeof(struct elfhdr); /* ELF core header.*/ - size += sizeof(struct elf_phdr); /* place holder for cpu notes */ - /* Program headers for crash memory regions. */ - size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2); - - size = PAGE_ALIGN(size); /* This is to hold kernel metadata on platforms that support it */ size += (fw_dump.ops->fadump_get_metadata_size ? @@ -389,10 +403,11 @@ static unsigned long __init get_fadump_area_size(void) static int __init add_boot_mem_region(unsigned long rstart, unsigned long rsize) { + int max_boot_mem_rgns = fw_dump.ops->fadump_max_boot_mem_rgns(); int i = fw_dump.boot_mem_regs_cnt++; - if (fw_dump.boot_mem_regs_cnt > FADUMP_MAX_MEM_REGS) { - fw_dump.boot_mem_regs_cnt = FADUMP_MAX_MEM_REGS; + if (fw_dump.boot_mem_regs_cnt > max_boot_mem_rgns) { + fw_dump.boot_mem_regs_cnt = max_boot_mem_rgns; return 0; } @@ -552,13 +567,6 @@ int __init fadump_reserve_mem(void) if (!fw_dump.dump_active) { fw_dump.boot_memory_size = PAGE_ALIGN(fadump_calculate_reserve_size()); -#ifdef CONFIG_CMA - if (!fw_dump.nocma) { - fw_dump.boot_memory_size = - ALIGN(fw_dump.boot_memory_size, - CMA_MIN_ALIGNMENT_BYTES); - } -#endif bootmem_min = fw_dump.ops->fadump_get_bootmem_min(); if (fw_dump.boot_memory_size < bootmem_min) { @@ -573,22 +581,6 @@ int __init fadump_reserve_mem(void) } } - /* - * Calculate the memory boundary. - * If memory_limit is less than actual memory boundary then reserve - * the memory for fadump beyond the memory_limit and adjust the - * memory_limit accordingly, so that the running kernel can run with - * specified memory_limit. - */ - if (memory_limit && memory_limit < memblock_end_of_DRAM()) { - size = get_fadump_area_size(); - if ((memory_limit + size) < memblock_end_of_DRAM()) - memory_limit += size; - else - memory_limit = memblock_end_of_DRAM(); - printk(KERN_INFO "Adjusted memory_limit for firmware-assisted" - " dump, now %#016llx\n", memory_limit); - } if (memory_limit) mem_boundary = memory_limit; else @@ -647,13 +639,12 @@ int __init fadump_reserve_mem(void) pr_info("Reserved %lldMB of memory at %#016llx (System RAM: %lldMB)\n", (size >> 20), base, (memblock_phys_mem_size() >> 20)); - - ret = fadump_cma_init(); } return ret; error_out: fw_dump.fadump_enabled = 0; + fw_dump.reserve_dump_area_size = 0; return 0; } @@ -704,7 +695,7 @@ void crash_fadump(struct pt_regs *regs, const char *str) * old_cpu == -1 means this is the first CPU which has come here, * go ahead and trigger fadump. * - * old_cpu != -1 means some other CPU has already on it's way + * old_cpu != -1 means some other CPU has already on its way * to trigger fadump, just keep looping here. */ this_cpu = smp_processor_id(); @@ -759,7 +750,7 @@ u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) * prstatus.pr_pid = ???? */ elf_core_copy_regs(&prstatus.pr_reg, regs); - buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS, + buf = append_elf_note(buf, NN_PRSTATUS, NT_PRSTATUS, &prstatus, sizeof(prstatus)); return buf; } @@ -930,36 +921,6 @@ static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, return 0; } -static int fadump_exclude_reserved_area(u64 start, u64 end) -{ - u64 ra_start, ra_end; - int ret = 0; - - ra_start = fw_dump.reserve_dump_area_start; - ra_end = ra_start + fw_dump.reserve_dump_area_size; - - if ((ra_start < end) && (ra_end > start)) { - if ((start < ra_start) && (end > ra_end)) { - ret = fadump_add_mem_range(&crash_mrange_info, - start, ra_start); - if (ret) - return ret; - - ret = fadump_add_mem_range(&crash_mrange_info, - ra_end, end); - } else if (start < ra_start) { - ret = fadump_add_mem_range(&crash_mrange_info, - start, ra_start); - } else if (ra_end < end) { - ret = fadump_add_mem_range(&crash_mrange_info, - ra_end, end); - } - } else - ret = fadump_add_mem_range(&crash_mrange_info, start, end); - - return ret; -} - static int fadump_init_elfcore_header(char *bufp) { struct elfhdr *elf; @@ -997,52 +958,6 @@ static int fadump_init_elfcore_header(char *bufp) } /* - * Traverse through memblock structure and setup crash memory ranges. These - * ranges will be used create PT_LOAD program headers in elfcore header. - */ -static int fadump_setup_crash_memory_ranges(void) -{ - u64 i, start, end; - int ret; - - pr_debug("Setup crash memory ranges.\n"); - crash_mrange_info.mem_range_cnt = 0; - - /* - * Boot memory region(s) registered with firmware are moved to - * different location at the time of crash. Create separate program - * header(s) for this memory chunk(s) with the correct offset. - */ - for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { - start = fw_dump.boot_mem_addr[i]; - end = start + fw_dump.boot_mem_sz[i]; - ret = fadump_add_mem_range(&crash_mrange_info, start, end); - if (ret) - return ret; - } - - for_each_mem_range(i, &start, &end) { - /* - * skip the memory chunk that is already added - * (0 through boot_memory_top). - */ - if (start < fw_dump.boot_mem_top) { - if (end > fw_dump.boot_mem_top) - start = fw_dump.boot_mem_top; - else - continue; - } - - /* add this range excluding the reserved dump area. */ - ret = fadump_exclude_reserved_area(start, end); - if (ret) - return ret; - } - - return 0; -} - -/* * If the given physical address falls within the boot memory region then * return the relocated address that points to the dump region reserved * for saving initial boot memory contents. @@ -1072,36 +987,50 @@ static inline unsigned long fadump_relocate(unsigned long paddr) return raddr; } -static int fadump_create_elfcore_headers(char *bufp) +static void __init populate_elf_pt_load(struct elf_phdr *phdr, u64 start, + u64 size, unsigned long long offset) { - unsigned long long raddr, offset; - struct elf_phdr *phdr; + phdr->p_align = 0; + phdr->p_memsz = size; + phdr->p_filesz = size; + phdr->p_paddr = start; + phdr->p_offset = offset; + phdr->p_type = PT_LOAD; + phdr->p_flags = PF_R|PF_W|PF_X; + phdr->p_vaddr = (unsigned long)__va(start); +} + +static void __init fadump_populate_elfcorehdr(struct fadump_crash_info_header *fdh) +{ + char *bufp; struct elfhdr *elf; - int i, j; + struct elf_phdr *phdr; + u64 boot_mem_dest_offset; + unsigned long long i, ra_start, ra_end, ra_size, mstart, mend; + bufp = (char *) fw_dump.elfcorehdr_addr; fadump_init_elfcore_header(bufp); elf = (struct elfhdr *)bufp; bufp += sizeof(struct elfhdr); /* - * setup ELF PT_NOTE, place holder for cpu notes info. The notes info - * will be populated during second kernel boot after crash. Hence - * this PT_NOTE will always be the first elf note. + * Set up ELF PT_NOTE, a placeholder for CPU notes information. + * The notes info will be populated later by platform-specific code. + * Hence, this PT_NOTE will always be the first ELF note. * * NOTE: Any new ELF note addition should be placed after this note. */ phdr = (struct elf_phdr *)bufp; bufp += sizeof(struct elf_phdr); phdr->p_type = PT_NOTE; - phdr->p_flags = 0; - phdr->p_vaddr = 0; - phdr->p_align = 0; - - phdr->p_offset = 0; - phdr->p_paddr = 0; - phdr->p_filesz = 0; - phdr->p_memsz = 0; - + phdr->p_flags = 0; + phdr->p_vaddr = 0; + phdr->p_align = 0; + phdr->p_offset = 0; + phdr->p_paddr = 0; + phdr->p_filesz = 0; + phdr->p_memsz = 0; + /* Increment number of program headers. */ (elf->e_phnum)++; /* setup ELF PT_NOTE for vmcoreinfo */ @@ -1111,55 +1040,66 @@ static int fadump_create_elfcore_headers(char *bufp) phdr->p_flags = 0; phdr->p_vaddr = 0; phdr->p_align = 0; - - phdr->p_paddr = fadump_relocate(paddr_vmcoreinfo_note()); - phdr->p_offset = phdr->p_paddr; - phdr->p_memsz = phdr->p_filesz = VMCOREINFO_NOTE_SIZE; - + phdr->p_paddr = phdr->p_offset = fdh->vmcoreinfo_raddr; + phdr->p_memsz = phdr->p_filesz = fdh->vmcoreinfo_size; /* Increment number of program headers. */ (elf->e_phnum)++; - /* setup PT_LOAD sections. */ - j = 0; - offset = 0; - raddr = fw_dump.boot_mem_addr[0]; - for (i = 0; i < crash_mrange_info.mem_range_cnt; i++) { - u64 mbase, msize; - - mbase = crash_mrange_info.mem_ranges[i].base; - msize = crash_mrange_info.mem_ranges[i].size; - if (!msize) - continue; - + /* + * Setup PT_LOAD sections. first include boot memory regions + * and then add rest of the memory regions. + */ + boot_mem_dest_offset = fw_dump.boot_mem_dest_addr; + for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { phdr = (struct elf_phdr *)bufp; bufp += sizeof(struct elf_phdr); - phdr->p_type = PT_LOAD; - phdr->p_flags = PF_R|PF_W|PF_X; - phdr->p_offset = mbase; - - if (mbase == raddr) { - /* - * The entire real memory region will be moved by - * firmware to the specified destination_address. - * Hence set the correct offset. - */ - phdr->p_offset = fw_dump.boot_mem_dest_addr + offset; - if (j < (fw_dump.boot_mem_regs_cnt - 1)) { - offset += fw_dump.boot_mem_sz[j]; - raddr = fw_dump.boot_mem_addr[++j]; - } + populate_elf_pt_load(phdr, fw_dump.boot_mem_addr[i], + fw_dump.boot_mem_sz[i], + boot_mem_dest_offset); + /* Increment number of program headers. */ + (elf->e_phnum)++; + boot_mem_dest_offset += fw_dump.boot_mem_sz[i]; + } + + /* Memory reserved for fadump in first kernel */ + ra_start = fw_dump.reserve_dump_area_start; + ra_size = get_fadump_area_size(); + ra_end = ra_start + ra_size; + + phdr = (struct elf_phdr *)bufp; + for_each_mem_range(i, &mstart, &mend) { + /* Boot memory regions already added, skip them now */ + if (mstart < fw_dump.boot_mem_top) { + if (mend > fw_dump.boot_mem_top) + mstart = fw_dump.boot_mem_top; + else + continue; } - phdr->p_paddr = mbase; - phdr->p_vaddr = (unsigned long)__va(mbase); - phdr->p_filesz = msize; - phdr->p_memsz = msize; - phdr->p_align = 0; + /* Handle memblock regions overlaps with fadump reserved area */ + if ((ra_start < mend) && (ra_end > mstart)) { + if ((mstart < ra_start) && (mend > ra_end)) { + populate_elf_pt_load(phdr, mstart, ra_start - mstart, mstart); + /* Increment number of program headers. */ + (elf->e_phnum)++; + bufp += sizeof(struct elf_phdr); + phdr = (struct elf_phdr *)bufp; + populate_elf_pt_load(phdr, ra_end, mend - ra_end, ra_end); + } else if (mstart < ra_start) { + populate_elf_pt_load(phdr, mstart, ra_start - mstart, mstart); + } else if (ra_end < mend) { + populate_elf_pt_load(phdr, ra_end, mend - ra_end, ra_end); + } + } else { + /* No overlap with fadump reserved memory region */ + populate_elf_pt_load(phdr, mstart, mend - mstart, mstart); + } /* Increment number of program headers. */ (elf->e_phnum)++; + bufp += sizeof(struct elf_phdr); + phdr = (struct elf_phdr *) bufp; } - return 0; } static unsigned long init_fadump_header(unsigned long addr) @@ -1174,14 +1114,25 @@ static unsigned long init_fadump_header(unsigned long addr) memset(fdh, 0, sizeof(struct fadump_crash_info_header)); fdh->magic_number = FADUMP_CRASH_INFO_MAGIC; - fdh->elfcorehdr_addr = addr; + fdh->version = FADUMP_HEADER_VERSION; /* We will set the crashing cpu id in crash_fadump() during crash. */ fdh->crashing_cpu = FADUMP_CPU_UNKNOWN; + + /* + * The physical address and size of vmcoreinfo are required in the + * second kernel to prepare elfcorehdr. + */ + fdh->vmcoreinfo_raddr = fadump_relocate(paddr_vmcoreinfo_note()); + fdh->vmcoreinfo_size = VMCOREINFO_NOTE_SIZE; + + + fdh->pt_regs_sz = sizeof(struct pt_regs); /* * When LPAR is terminated by PYHP, ensure all possible CPUs' * register data is processed while exporting the vmcore. */ fdh->cpu_mask = *cpu_possible_mask; + fdh->cpu_mask_sz = sizeof(struct cpumask); return addr; } @@ -1189,8 +1140,6 @@ static unsigned long init_fadump_header(unsigned long addr) static int register_fadump(void) { unsigned long addr; - void *vaddr; - int ret; /* * If no memory is reserved then we can not register for firmware- @@ -1199,18 +1148,10 @@ static int register_fadump(void) if (!fw_dump.reserve_dump_area_size) return -ENODEV; - ret = fadump_setup_crash_memory_ranges(); - if (ret) - return ret; - addr = fw_dump.fadumphdr_addr; /* Initialize fadump crash info header. */ addr = init_fadump_header(addr); - vaddr = __va(addr); - - pr_debug("Creating ELF core headers at %#016lx\n", addr); - fadump_create_elfcore_headers(vaddr); /* register the future kernel dump with firmware. */ pr_debug("Registering for firmware-assisted kernel dump...\n"); @@ -1229,7 +1170,6 @@ void fadump_cleanup(void) } else if (fw_dump.dump_registered) { /* Un-register Firmware-assisted dump if it was registered. */ fw_dump.ops->fadump_unregister(&fw_dump); - fadump_free_mem_ranges(&crash_mrange_info); } if (fw_dump.ops->fadump_cleanup) @@ -1415,17 +1355,31 @@ static void fadump_release_memory(u64 begin, u64 end) fadump_release_reserved_area(tstart, end); } -static void fadump_invalidate_release_mem(void) +static void fadump_free_elfcorehdr_buf(void) { - mutex_lock(&fadump_mutex); - if (!fw_dump.dump_active) { - mutex_unlock(&fadump_mutex); + if (fw_dump.elfcorehdr_addr == 0 || fw_dump.elfcorehdr_size == 0) return; - } - fadump_cleanup(); - mutex_unlock(&fadump_mutex); + /* + * Before freeing the memory of `elfcorehdr`, reset the global + * `elfcorehdr_addr` to prevent modules like `vmcore` from accessing + * invalid memory. + */ + elfcorehdr_addr = ELFCORE_ADDR_ERR; + fadump_free_buffer(fw_dump.elfcorehdr_addr, fw_dump.elfcorehdr_size); + fw_dump.elfcorehdr_addr = 0; + fw_dump.elfcorehdr_size = 0; +} +static void fadump_invalidate_release_mem(void) +{ + scoped_guard(mutex, &fadump_mutex) { + if (!fw_dump.dump_active) + return; + fadump_cleanup(); + } + + fadump_free_elfcorehdr_buf(); fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM()); fadump_free_cpu_notes_buf(); @@ -1483,6 +1437,18 @@ static ssize_t enabled_show(struct kobject *kobj, return sprintf(buf, "%d\n", fw_dump.fadump_enabled); } +/* + * /sys/kernel/fadump/hotplug_ready sysfs node returns 1, which inidcates + * to usersapce that fadump re-registration is not required on memory + * hotplug events. + */ +static ssize_t hotplug_ready_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", 1); +} + static ssize_t mem_reserved_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -1497,6 +1463,43 @@ static ssize_t registered_show(struct kobject *kobj, return sprintf(buf, "%d\n", fw_dump.dump_registered); } +static ssize_t bootargs_append_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%s\n", (char *)__va(fw_dump.param_area)); +} + +static ssize_t bootargs_append_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + char *params; + + if (!fw_dump.fadump_enabled || fw_dump.dump_active) + return -EPERM; + + if (count >= COMMAND_LINE_SIZE) + return -EINVAL; + + /* + * Fail here instead of handling this scenario with + * some silly workaround in capture kernel. + */ + if (saved_command_line_len + count >= COMMAND_LINE_SIZE) { + pr_err("Appending parameters exceeds cmdline size!\n"); + return -ENOSPC; + } + + params = __va(fw_dump.param_area); + strscpy_pad(params, buf, COMMAND_LINE_SIZE); + /* Remove newline character at the end. */ + if (params[count-1] == '\n') + params[count-1] = '\0'; + + return count; +} + static ssize_t registered_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) @@ -1555,11 +1558,14 @@ static struct kobj_attribute release_attr = __ATTR_WO(release_mem); static struct kobj_attribute enable_attr = __ATTR_RO(enabled); static struct kobj_attribute register_attr = __ATTR_RW(registered); static struct kobj_attribute mem_reserved_attr = __ATTR_RO(mem_reserved); +static struct kobj_attribute hotplug_ready_attr = __ATTR_RO(hotplug_ready); +static struct kobj_attribute bootargs_append_attr = __ATTR_RW(bootargs_append); static struct attribute *fadump_attrs[] = { &enable_attr.attr, ®ister_attr.attr, &mem_reserved_attr.attr, + &hotplug_ready_attr.attr, NULL, }; @@ -1577,6 +1583,12 @@ static void __init fadump_init_files(void) return; } + if (fw_dump.param_area) { + rc = sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr); + if (rc) + pr_err("unable to create bootargs_append sysfs file (%d)\n", rc); + } + debugfs_create_file("fadump_region", 0444, arch_debugfs_dir, NULL, &fadump_region_fops); @@ -1631,6 +1643,153 @@ static void __init fadump_init_files(void) return; } +static int __init fadump_setup_elfcorehdr_buf(void) +{ + int elf_phdr_cnt; + unsigned long elfcorehdr_size; + + /* + * Program header for CPU notes comes first, followed by one for + * vmcoreinfo, and the remaining program headers correspond to + * memory regions. + */ + elf_phdr_cnt = 2 + fw_dump.boot_mem_regs_cnt + memblock_num_regions(memory); + elfcorehdr_size = sizeof(struct elfhdr) + (elf_phdr_cnt * sizeof(struct elf_phdr)); + elfcorehdr_size = PAGE_ALIGN(elfcorehdr_size); + + fw_dump.elfcorehdr_addr = (u64)fadump_alloc_buffer(elfcorehdr_size); + if (!fw_dump.elfcorehdr_addr) { + pr_err("Failed to allocate %lu bytes for elfcorehdr\n", + elfcorehdr_size); + return -ENOMEM; + } + fw_dump.elfcorehdr_size = elfcorehdr_size; + return 0; +} + +/* + * Check if the fadump header of crashed kernel is compatible with fadump kernel. + * + * It checks the magic number, endianness, and size of non-primitive type + * members of fadump header to ensure safe dump collection. + */ +static bool __init is_fadump_header_compatible(struct fadump_crash_info_header *fdh) +{ + if (fdh->magic_number == FADUMP_CRASH_INFO_MAGIC_OLD) { + pr_err("Old magic number, can't process the dump.\n"); + return false; + } + + if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { + if (fdh->magic_number == swab64(FADUMP_CRASH_INFO_MAGIC)) + pr_err("Endianness mismatch between the crashed and fadump kernels.\n"); + else + pr_err("Fadump header is corrupted.\n"); + + return false; + } + + /* + * Dump collection is not safe if the size of non-primitive type members + * of the fadump header do not match between crashed and fadump kernel. + */ + if (fdh->pt_regs_sz != sizeof(struct pt_regs) || + fdh->cpu_mask_sz != sizeof(struct cpumask)) { + pr_err("Fadump header size mismatch.\n"); + return false; + } + + return true; +} + +static void __init fadump_process(void) +{ + struct fadump_crash_info_header *fdh; + + fdh = (struct fadump_crash_info_header *) __va(fw_dump.fadumphdr_addr); + if (!fdh) { + pr_err("Crash info header is empty.\n"); + goto err_out; + } + + /* Avoid processing the dump if fadump header isn't compatible */ + if (!is_fadump_header_compatible(fdh)) + goto err_out; + + /* Allocate buffer for elfcorehdr */ + if (fadump_setup_elfcorehdr_buf()) + goto err_out; + + fadump_populate_elfcorehdr(fdh); + + /* Let platform update the CPU notes in elfcorehdr */ + if (fw_dump.ops->fadump_process(&fw_dump) < 0) + goto err_out; + + /* + * elfcorehdr is now ready to be exported. + * + * set elfcorehdr_addr so that vmcore module will export the + * elfcorehdr through '/proc/vmcore'. + */ + elfcorehdr_addr = virt_to_phys((void *)fw_dump.elfcorehdr_addr); + return; + +err_out: + fadump_invalidate_release_mem(); +} + +/* + * Reserve memory to store additional parameters to be passed + * for fadump/capture kernel. + */ +void __init fadump_setup_param_area(void) +{ + phys_addr_t range_start, range_end; + + if (!fw_dump.fadump_enabled) + return; + + if (!fw_dump.param_area_supported || fw_dump.dump_active) + return; + + /* This memory can't be used by PFW or bootloader as it is shared across kernels */ + if (early_radix_enabled()) { + /* + * Anywhere in the upper half should be good enough as all memory + * is accessible in real mode. + */ + range_start = memblock_end_of_DRAM() / 2; + range_end = memblock_end_of_DRAM(); + } else { + /* + * Memory range for passing additional parameters for HASH MMU + * must meet the following conditions: + * 1. The first memory block size must be higher than the + * minimum RMA (MIN_RMA) size. Bootloader can use memory + * upto RMA size. So it should be avoided. + * 2. The range should be between MIN_RMA and RMA size (ppc64_rma_size) + * 3. It must not overlap with the fadump reserved area. + */ + if (ppc64_rma_size < MIN_RMA*1024*1024) + return; + + range_start = MIN_RMA * 1024 * 1024; + range_end = min(ppc64_rma_size, fw_dump.boot_mem_top); + } + + fw_dump.param_area = memblock_phys_alloc_range(COMMAND_LINE_SIZE, + COMMAND_LINE_SIZE, + range_start, + range_end); + if (!fw_dump.param_area) { + pr_warn("WARNING: Could not setup area to pass additional parameters!\n"); + return; + } + + memset((void *)fw_dump.param_area, 0, COMMAND_LINE_SIZE); +} + /* * Prepare for firmware-assisted dump. */ @@ -1650,12 +1809,7 @@ int __init setup_fadump(void) * saving it to the disk. */ if (fw_dump.dump_active) { - /* - * if dump process fails then invalidate the registration - * and release memory before proceeding for re-registration. - */ - if (fw_dump.ops->fadump_process(&fw_dump) < 0) - fadump_invalidate_release_mem(); + fadump_process(); } /* Initialize the kernel dump memory structure and register with f/w */ else if (fw_dump.reserve_dump_area_size) { @@ -1734,8 +1888,3 @@ static void __init fadump_reserve_crash_area(u64 base) memblock_reserve(mstart, msize); } } - -unsigned long __init arch_reserved_kernel_pages(void) -{ - return memblock_reserved_size() / PAGE_SIZE; -} diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index 20328f72f9f2..8987eee33dc8 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -23,6 +23,8 @@ EXPORT_SYMBOL_GPL(powerpc_firmware_features); #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) DEFINE_STATIC_KEY_FALSE(kvm_guest); +EXPORT_SYMBOL_GPL(kvm_guest); + int __init check_kvm_guest(void) { struct device_node *hyper_node; diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index f71f2bbd4de6..2f8f3f93cbb6 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -9,6 +9,7 @@ * Copyright (C) 1997 Dan Malek (dmalek@jlc.net). */ +#include <linux/export.h> #include <asm/reg.h> #include <asm/page.h> #include <asm/mmu.h> @@ -18,11 +19,19 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/ptrace.h> -#include <asm/export.h> #include <asm/asm-compat.h> #include <asm/feature-fixups.h> #ifdef CONFIG_VSX +#define __REST_1FPVSR(n,c,base) \ +BEGIN_FTR_SECTION \ + b 2f; \ +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ + REST_FPR(n,base); \ + b 3f; \ +2: REST_VSR(n,c,base); \ +3: + #define __REST_32FPVSRS(n,c,base) \ BEGIN_FTR_SECTION \ b 2f; \ @@ -41,9 +50,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ 2: SAVE_32VSRS(n,c,base); \ 3: #else +#define __REST_1FPVSR(n,b,base) REST_FPR(n, base) #define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) #define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) #endif +#define REST_1FPVSR(n,c,base) __REST_1FPVSR(n,__REG_##c,__REG_##base) #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base) @@ -67,6 +78,7 @@ _GLOBAL(store_fp_state) SAVE_32FPVSRS(0, R4, R3) mffs fr0 stfd fr0,FPSTATE_FPSCR(r3) + REST_1FPVSR(0, R4, R3) blr EXPORT_SYMBOL(store_fp_state) @@ -138,4 +150,5 @@ _GLOBAL(save_fpu) 2: SAVE_32FPVSRS(0, R4, R6) mffs fr0 stfd fr0,FPSTATE_FPSCR(r6) + REST_1FPVSR(0, R4, R6) blr diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index f8e2911478a7..9cba7dbf58dd 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -21,17 +21,9 @@ mtspr SPRN_SPRG_SCRATCH1,r11 mfspr r10, SPRN_SPRG_THREAD .if \handle_dar_dsisr -#ifdef CONFIG_40x - mfspr r11, SPRN_DEAR -#else mfspr r11, SPRN_DAR -#endif stw r11, DAR(r10) -#ifdef CONFIG_40x - mfspr r11, SPRN_ESR -#else mfspr r11, SPRN_DSISR -#endif stw r11, DSISR(r10) .endif mfspr r11, SPRN_SRR0 @@ -96,9 +88,7 @@ .endif lwz r9, SRR1(r12) lwz r12, SRR0(r12) -#ifdef CONFIG_40x - rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ -#elif defined(CONFIG_PPC_8xx) +#ifdef CONFIG_PPC_8xx mtspr SPRN_EID, r2 /* Set MSR_RI */ #else li r10, MSR_KERNEL /* can take exceptions */ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S deleted file mode 100644 index 3f68a1624646..000000000000 --- a/arch/powerpc/kernel/head_40x.S +++ /dev/null @@ -1,721 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org> - * Initial PowerPC version. - * Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu> - * Rewritten for PReP - * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au> - * Low-level exception handers, MMU support, and rewrite. - * Copyright (c) 1997 Dan Malek <dmalek@jlc.net> - * PowerPC 8xx modifications. - * Copyright (c) 1998-1999 TiVo, Inc. - * PowerPC 403GCX modifications. - * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu> - * PowerPC 403GCX/405GP modifications. - * Copyright 2000 MontaVista Software Inc. - * PPC405 modifications - * PowerPC 403GCX/405GP modifications. - * Author: MontaVista Software, Inc. - * frank_rowand@mvista.com or source@mvista.com - * debbie_chu@mvista.com - * - * Module name: head_4xx.S - * - * Description: - * Kernel execution entry point code. - */ - -#include <linux/init.h> -#include <linux/pgtable.h> -#include <linux/sizes.h> -#include <linux/linkage.h> - -#include <asm/processor.h> -#include <asm/page.h> -#include <asm/mmu.h> -#include <asm/cputable.h> -#include <asm/thread_info.h> -#include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> -#include <asm/ptrace.h> -#include <asm/export.h> - -#include "head_32.h" - -/* As with the other PowerPC ports, it is expected that when code - * execution begins here, the following registers contain valid, yet - * optional, information: - * - * r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.) - * r4 - Starting address of the init RAM disk - * r5 - Ending address of the init RAM disk - * r6 - Start of kernel command line string (e.g. "mem=96m") - * r7 - End of kernel command line string - * - * This is all going to change RSN when we add bi_recs....... -- Dan - */ - __HEAD -_GLOBAL(_stext); -_GLOBAL(_start); - - mr r31,r3 /* save device tree ptr */ - - /* We have to turn on the MMU right away so we get cache modes - * set correctly. - */ - bl initial_mmu - -/* We now have the lower 16 Meg mapped into TLB entries, and the caches - * ready to work. - */ -turn_on_mmu: - lis r0,MSR_KERNEL@h - ori r0,r0,MSR_KERNEL@l - mtspr SPRN_SRR1,r0 - lis r0,start_here@h - ori r0,r0,start_here@l - mtspr SPRN_SRR0,r0 - rfi /* enables MMU */ - b . /* prevent prefetch past rfi */ - -/* - * This area is used for temporarily saving registers during the - * critical exception prolog. - */ - . = 0xc0 -crit_save: -_GLOBAL(crit_r10) - .space 4 -_GLOBAL(crit_r11) - .space 4 -_GLOBAL(crit_srr0) - .space 4 -_GLOBAL(crit_srr1) - .space 4 -_GLOBAL(crit_r1) - .space 4 -_GLOBAL(crit_dear) - .space 4 -_GLOBAL(crit_esr) - .space 4 - -/* - * Exception prolog for critical exceptions. This is a little different - * from the normal exception prolog above since a critical exception - * can potentially occur at any point during normal exception processing. - * Thus we cannot use the same SPRG registers as the normal prolog above. - * Instead we use a couple of words of memory at low physical addresses. - * This is OK since we don't support SMP on these processors. - */ -.macro CRITICAL_EXCEPTION_PROLOG trapno name - stw r10,crit_r10@l(0) /* save two registers to work with */ - stw r11,crit_r11@l(0) - mfspr r10,SPRN_SRR0 - mfspr r11,SPRN_SRR1 - stw r10,crit_srr0@l(0) - stw r11,crit_srr1@l(0) - mfspr r10,SPRN_DEAR - mfspr r11,SPRN_ESR - stw r10,crit_dear@l(0) - stw r11,crit_esr@l(0) - mfcr r10 /* save CR in r10 for now */ - mfspr r11,SPRN_SRR3 /* check whether user or kernel */ - andi. r11,r11,MSR_PR - lis r11,(critirq_ctx-PAGE_OFFSET)@ha - lwz r11,(critirq_ctx-PAGE_OFFSET)@l(r11) - beq 1f - /* COMING FROM USER MODE */ - mfspr r11,SPRN_SPRG_THREAD /* if from user, start at top of */ - lwz r11,TASK_STACK-THREAD(r11) /* this thread's kernel stack */ -1: stw r1,crit_r1@l(0) - addi r1,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm */ - LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)) /* re-enable MMU */ - mtspr SPRN_SRR1, r11 - lis r11, 1f@h - ori r11, r11, 1f@l - mtspr SPRN_SRR0, r11 - rfi - - .text -1: -\name\()_virt: - lwz r11,crit_r1@l(0) - stw r11,GPR1(r1) - stw r11,0(r1) - mr r11,r1 - stw r10,_CCR(r11) /* save various registers */ - stw r12,GPR12(r11) - stw r9,GPR9(r11) - mflr r10 - stw r10,_LINK(r11) - lis r9,PAGE_OFFSET@ha - lwz r10,crit_r10@l(r9) - lwz r12,crit_r11@l(r9) - stw r10,GPR10(r11) - stw r12,GPR11(r11) - lwz r12,crit_dear@l(r9) - lwz r9,crit_esr@l(r9) - stw r12,_DEAR(r11) /* since they may have had stuff */ - stw r9,_ESR(r11) /* exception was taken */ - mfspr r12,SPRN_SRR2 - mfspr r9,SPRN_SRR3 - rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ - COMMON_EXCEPTION_PROLOG_END \trapno + 2 -_ASM_NOKPROBE_SYMBOL(\name\()_virt) -.endm - - /* - * State at this point: - * r9 saved in stack frame, now saved SRR3 & ~MSR_WE - * r10 saved in crit_r10 and in stack frame, trashed - * r11 saved in crit_r11 and in stack frame, - * now phys stack/exception frame pointer - * r12 saved in stack frame, now saved SRR2 - * CR saved in stack frame, CR0.EQ = !SRR3.PR - * LR, DEAR, ESR in stack frame - * r1 saved in stack frame, now virt stack/excframe pointer - * r0, r3-r8 saved in stack frame - */ - -/* - * Exception vectors. - */ -#define CRITICAL_EXCEPTION(n, label, hdlr) \ - START_EXCEPTION(n, label); \ - CRITICAL_EXCEPTION_PROLOG n label; \ - prepare_transfer_to_handler; \ - bl hdlr; \ - b ret_from_crit_exc - -/* - * 0x0100 - Critical Interrupt Exception - */ - CRITICAL_EXCEPTION(0x0100, CriticalInterrupt, unknown_exception) - -/* - * 0x0200 - Machine Check Exception - */ - CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) - -/* - * 0x0300 - Data Storage Exception - * This happens for just a few reasons. U0 set (but we don't do that), - * or zone protection fault (user violation, write to protected page). - * The other Data TLB exceptions bail out to this point - * if they can't resolve the lightweight TLB fault. - */ - START_EXCEPTION(0x0300, DataStorage) - EXCEPTION_PROLOG 0x300 DataStorage handle_dar_dsisr=1 - prepare_transfer_to_handler - bl do_page_fault - b interrupt_return - -/* - * 0x0400 - Instruction Storage Exception - * This is caused by a fetch from non-execute or guarded pages. - */ - START_EXCEPTION(0x0400, InstructionAccess) - EXCEPTION_PROLOG 0x400 InstructionAccess - li r5,0 - stw r5, _ESR(r11) /* Zero ESR */ - stw r12, _DEAR(r11) /* SRR0 as DEAR */ - prepare_transfer_to_handler - bl do_page_fault - b interrupt_return - -/* 0x0500 - External Interrupt Exception */ - EXCEPTION(0x0500, HardwareInterrupt, do_IRQ) - -/* 0x0600 - Alignment Exception */ - START_EXCEPTION(0x0600, Alignment) - EXCEPTION_PROLOG 0x600 Alignment handle_dar_dsisr=1 - prepare_transfer_to_handler - bl alignment_exception - REST_NVGPRS(r1) - b interrupt_return - -/* 0x0700 - Program Exception */ - START_EXCEPTION(0x0700, ProgramCheck) - EXCEPTION_PROLOG 0x700 ProgramCheck handle_dar_dsisr=1 - prepare_transfer_to_handler - bl program_check_exception - REST_NVGPRS(r1) - b interrupt_return - - EXCEPTION(0x0800, Trap_08, unknown_exception) - EXCEPTION(0x0900, Trap_09, unknown_exception) - EXCEPTION(0x0A00, Trap_0A, unknown_exception) - EXCEPTION(0x0B00, Trap_0B, unknown_exception) - -/* 0x0C00 - System Call Exception */ - START_EXCEPTION(0x0C00, SystemCall) - SYSCALL_ENTRY 0xc00 -/* Trap_0D is commented out to get more space for system call exception */ - -/* EXCEPTION(0x0D00, Trap_0D, unknown_exception) */ - EXCEPTION(0x0E00, Trap_0E, unknown_exception) - EXCEPTION(0x0F00, Trap_0F, unknown_exception) - -/* 0x1000 - Programmable Interval Timer (PIT) Exception */ - START_EXCEPTION(0x1000, DecrementerTrap) - b Decrementer - -/* 0x1010 - Fixed Interval Timer (FIT) Exception */ - START_EXCEPTION(0x1010, FITExceptionTrap) - b FITException - -/* 0x1020 - Watchdog Timer (WDT) Exception */ - START_EXCEPTION(0x1020, WDTExceptionTrap) - b WDTException - -/* 0x1100 - Data TLB Miss Exception - * As the name implies, translation is not in the MMU, so search the - * page tables and fix it. The only purpose of this function is to - * load TLB entries from the page table if they exist. - */ - START_EXCEPTION(0x1100, DTLBMiss) - mtspr SPRN_SPRG_SCRATCH5, r10 /* Save some working registers */ - mtspr SPRN_SPRG_SCRATCH6, r11 - mtspr SPRN_SPRG_SCRATCH3, r12 - mtspr SPRN_SPRG_SCRATCH4, r9 - mfcr r12 - mfspr r9, SPRN_PID - rlwimi r12, r9, 0, 0xff - mfspr r10, SPRN_DEAR /* Get faulting address */ - - /* If we are faulting a kernel address, we have to use the - * kernel page tables. - */ - lis r11, PAGE_OFFSET@h - cmplw r10, r11 - blt+ 3f - lis r11, swapper_pg_dir@h - ori r11, r11, swapper_pg_dir@l - li r9, 0 - mtspr SPRN_PID, r9 /* TLB will have 0 TID */ - b 4f - - /* Get the PGD for the current thread. - */ -3: - mfspr r11,SPRN_SPRG_THREAD - lwz r11,PGDIR(r11) -#ifdef CONFIG_PPC_KUAP - rlwinm. r9, r9, 0, 0xff - beq 5f /* Kuap fault */ -#endif -4: - tophys(r11, r11) - rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ - lwz r11, 0(r11) /* Get L1 entry */ - andi. r9, r11, _PMD_PRESENT /* Check if it points to a PTE page */ - beq 2f /* Bail if no table */ - - rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */ - lwz r11, 0(r11) /* Get Linux PTE */ - li r9, _PAGE_PRESENT | _PAGE_ACCESSED - andc. r9, r9, r11 /* Check permission */ - bne 5f - - rlwinm r9, r11, 1, _PAGE_RW /* dirty => rw */ - and r9, r9, r11 /* hwwrite = dirty & rw */ - rlwimi r11, r9, 0, _PAGE_RW /* replace rw by hwwrite */ - - /* Create TLB tag. This is the faulting address plus a static - * set of bits. These are size, valid, E, U0. - */ - li r9, 0x00c0 - rlwimi r10, r9, 0, 20, 31 - - b finish_tlb_load - -2: /* Check for possible large-page pmd entry */ - rlwinm. r9, r11, 2, 22, 24 - beq 5f - - /* Create TLB tag. This is the faulting address, plus a static - * set of bits (valid, E, U0) plus the size from the PMD. - */ - ori r9, r9, 0x40 - rlwimi r10, r9, 0, 20, 31 - - b finish_tlb_load - -5: - /* The bailout. Restore registers to pre-exception conditions - * and call the heavyweights to help us out. - */ - mtspr SPRN_PID, r12 - mtcrf 0x80, r12 - mfspr r9, SPRN_SPRG_SCRATCH4 - mfspr r12, SPRN_SPRG_SCRATCH3 - mfspr r11, SPRN_SPRG_SCRATCH6 - mfspr r10, SPRN_SPRG_SCRATCH5 - b DataStorage - -/* 0x1200 - Instruction TLB Miss Exception - * Nearly the same as above, except we get our information from different - * registers and bailout to a different point. - */ - START_EXCEPTION(0x1200, ITLBMiss) - mtspr SPRN_SPRG_SCRATCH5, r10 /* Save some working registers */ - mtspr SPRN_SPRG_SCRATCH6, r11 - mtspr SPRN_SPRG_SCRATCH3, r12 - mtspr SPRN_SPRG_SCRATCH4, r9 - mfcr r12 - mfspr r9, SPRN_PID - rlwimi r12, r9, 0, 0xff - mfspr r10, SPRN_SRR0 /* Get faulting address */ - - /* If we are faulting a kernel address, we have to use the - * kernel page tables. - */ - lis r11, PAGE_OFFSET@h - cmplw r10, r11 - blt+ 3f - lis r11, swapper_pg_dir@h - ori r11, r11, swapper_pg_dir@l - li r9, 0 - mtspr SPRN_PID, r9 /* TLB will have 0 TID */ - b 4f - - /* Get the PGD for the current thread. - */ -3: - mfspr r11,SPRN_SPRG_THREAD - lwz r11,PGDIR(r11) -#ifdef CONFIG_PPC_KUAP - rlwinm. r9, r9, 0, 0xff - beq 5f /* Kuap fault */ -#endif -4: - tophys(r11, r11) - rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ - lwz r11, 0(r11) /* Get L1 entry */ - andi. r9, r11, _PMD_PRESENT /* Check if it points to a PTE page */ - beq 2f /* Bail if no table */ - - rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */ - lwz r11, 0(r11) /* Get Linux PTE */ - li r9, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC - andc. r9, r9, r11 /* Check permission */ - bne 5f - - rlwinm r9, r11, 1, _PAGE_RW /* dirty => rw */ - and r9, r9, r11 /* hwwrite = dirty & rw */ - rlwimi r11, r9, 0, _PAGE_RW /* replace rw by hwwrite */ - - /* Create TLB tag. This is the faulting address plus a static - * set of bits. These are size, valid, E, U0. - */ - li r9, 0x00c0 - rlwimi r10, r9, 0, 20, 31 - - b finish_tlb_load - -2: /* Check for possible large-page pmd entry */ - rlwinm. r9, r11, 2, 22, 24 - beq 5f - - /* Create TLB tag. This is the faulting address, plus a static - * set of bits (valid, E, U0) plus the size from the PMD. - */ - ori r9, r9, 0x40 - rlwimi r10, r9, 0, 20, 31 - - b finish_tlb_load - -5: - /* The bailout. Restore registers to pre-exception conditions - * and call the heavyweights to help us out. - */ - mtspr SPRN_PID, r12 - mtcrf 0x80, r12 - mfspr r9, SPRN_SPRG_SCRATCH4 - mfspr r12, SPRN_SPRG_SCRATCH3 - mfspr r11, SPRN_SPRG_SCRATCH6 - mfspr r10, SPRN_SPRG_SCRATCH5 - b InstructionAccess - - EXCEPTION(0x1300, Trap_13, unknown_exception) - EXCEPTION(0x1400, Trap_14, unknown_exception) - EXCEPTION(0x1500, Trap_15, unknown_exception) - EXCEPTION(0x1600, Trap_16, unknown_exception) - EXCEPTION(0x1700, Trap_17, unknown_exception) - EXCEPTION(0x1800, Trap_18, unknown_exception) - EXCEPTION(0x1900, Trap_19, unknown_exception) - EXCEPTION(0x1A00, Trap_1A, unknown_exception) - EXCEPTION(0x1B00, Trap_1B, unknown_exception) - EXCEPTION(0x1C00, Trap_1C, unknown_exception) - EXCEPTION(0x1D00, Trap_1D, unknown_exception) - EXCEPTION(0x1E00, Trap_1E, unknown_exception) - EXCEPTION(0x1F00, Trap_1F, unknown_exception) - -/* Check for a single step debug exception while in an exception - * handler before state has been saved. This is to catch the case - * where an instruction that we are trying to single step causes - * an exception (eg ITLB/DTLB miss) and thus the first instruction of - * the exception handler generates a single step debug exception. - * - * If we get a debug trap on the first instruction of an exception handler, - * we reset the MSR_DE in the _exception handler's_ MSR (the debug trap is - * a critical exception, so we are using SPRN_CSRR1 to manipulate the MSR). - * The exception handler was handling a non-critical interrupt, so it will - * save (and later restore) the MSR via SPRN_SRR1, which will still have - * the MSR_DE bit set. - */ - /* 0x2000 - Debug Exception */ - START_EXCEPTION(0x2000, DebugTrap) - CRITICAL_EXCEPTION_PROLOG 0x2000 DebugTrap - - /* - * If this is a single step or branch-taken exception in an - * exception entry sequence, it was probably meant to apply to - * the code where the exception occurred (since exception entry - * doesn't turn off DE automatically). We simulate the effect - * of turning off DE on entry to an exception handler by turning - * off DE in the SRR3 value and clearing the debug status. - */ - mfspr r10,SPRN_DBSR /* check single-step/branch taken */ - andis. r10,r10,DBSR_IC@h - beq+ 2f - - andi. r10,r9,MSR_IR|MSR_PR /* check supervisor + MMU off */ - beq 1f /* branch and fix it up */ - - mfspr r10,SPRN_SRR2 /* Faulting instruction address */ - cmplwi r10,0x2100 - bgt+ 2f /* address above exception vectors */ - - /* here it looks like we got an inappropriate debug exception. */ -1: rlwinm r9,r9,0,~MSR_DE /* clear DE in the SRR3 value */ - lis r10,DBSR_IC@h /* clear the IC event */ - mtspr SPRN_DBSR,r10 - /* restore state and get out */ - lwz r10,_CCR(r11) - lwz r0,GPR0(r11) - lwz r1,GPR1(r11) - mtcrf 0x80,r10 - mtspr SPRN_SRR2,r12 - mtspr SPRN_SRR3,r9 - lwz r9,GPR9(r11) - lwz r12,GPR12(r11) - lwz r10,crit_r10@l(0) - lwz r11,crit_r11@l(0) - rfci - b . - - /* continue normal handling for a critical exception... */ -2: mfspr r4,SPRN_DBSR - stw r4,_ESR(r11) /* DebugException takes DBSR in _ESR */ - prepare_transfer_to_handler - bl DebugException - b ret_from_crit_exc - - /* Programmable Interval Timer (PIT) Exception. (from 0x1000) */ - __HEAD -Decrementer: - EXCEPTION_PROLOG 0x1000 Decrementer - lis r0,TSR_PIS@h - mtspr SPRN_TSR,r0 /* Clear the PIT exception */ - prepare_transfer_to_handler - bl timer_interrupt - b interrupt_return - - /* Fixed Interval Timer (FIT) Exception. (from 0x1010) */ - __HEAD -FITException: - EXCEPTION_PROLOG 0x1010 FITException - prepare_transfer_to_handler - bl unknown_exception - b interrupt_return - - /* Watchdog Timer (WDT) Exception. (from 0x1020) */ - __HEAD -WDTException: - CRITICAL_EXCEPTION_PROLOG 0x1020 WDTException - prepare_transfer_to_handler - bl WatchdogException - b ret_from_crit_exc - -/* Other PowerPC processors, namely those derived from the 6xx-series - * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved. - * However, for the 4xx-series processors these are neither defined nor - * reserved. - */ - - __HEAD - /* Damn, I came up one instruction too many to fit into the - * exception space :-). Both the instruction and data TLB - * miss get to this point to load the TLB. - * r10 - TLB_TAG value - * r11 - Linux PTE - * r9 - available to use - * PID - loaded with proper value when we get here - * Upon exit, we reload everything and RFI. - * Actually, it will fit now, but oh well.....a common place - * to load the TLB. - */ -tlb_4xx_index: - .long 0 -finish_tlb_load: - /* - * Clear out the software-only bits in the PTE to generate the - * TLB_DATA value. These are the bottom 2 bits of the RPM, the - * top 3 bits of the zone field, and M. - */ - li r9, 0x0ce2 - andc r11, r11, r9 - - /* load the next available TLB index. */ - lwz r9, tlb_4xx_index@l(0) - addi r9, r9, 1 - andi. r9, r9, PPC40X_TLB_SIZE - 1 - stw r9, tlb_4xx_index@l(0) - - tlbwe r11, r9, TLB_DATA /* Load TLB LO */ - tlbwe r10, r9, TLB_TAG /* Load TLB HI */ - - /* Done...restore registers and get out of here. - */ - mtspr SPRN_PID, r12 - mtcrf 0x80, r12 - mfspr r9, SPRN_SPRG_SCRATCH4 - mfspr r12, SPRN_SPRG_SCRATCH3 - mfspr r11, SPRN_SPRG_SCRATCH6 - mfspr r10, SPRN_SPRG_SCRATCH5 - rfi /* Should sync shadow TLBs */ - b . /* prevent prefetch past rfi */ - -/* This is where the main kernel code starts. - */ -start_here: - - /* ptr to current */ - lis r2,init_task@h - ori r2,r2,init_task@l - - /* ptr to phys current thread */ - tophys(r4,r2) - addi r4,r4,THREAD /* init task's THREAD */ - mtspr SPRN_SPRG_THREAD,r4 - - /* stack */ - lis r1,init_thread_union@ha - addi r1,r1,init_thread_union@l - li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1) - - bl early_init /* We have to do this with MMU on */ - -/* - * Decide what sort of machine this is and initialize the MMU. - */ -#ifdef CONFIG_KASAN - bl kasan_early_init -#endif - li r3,0 - mr r4,r31 - bl machine_init - bl MMU_init - -/* Go back to running unmapped so we can load up new values - * and change to using our exception vectors. - * On the 4xx, all we have to do is invalidate the TLB to clear - * the old 16M byte TLB mappings. - */ - lis r4,2f@h - ori r4,r4,2f@l - tophys(r4,r4) - lis r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@h - ori r3,r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@l - mtspr SPRN_SRR0,r4 - mtspr SPRN_SRR1,r3 - rfi - b . /* prevent prefetch past rfi */ - -/* Load up the kernel context */ -2: - sync /* Flush to memory before changing TLB */ - tlbia - isync /* Flush shadow TLBs */ - - /* set up the PTE pointers for the Abatron bdiGDB. - */ - lis r6, swapper_pg_dir@h - ori r6, r6, swapper_pg_dir@l - lis r5, abatron_pteptrs@h - ori r5, r5, abatron_pteptrs@l - stw r5, 0xf0(0) /* Must match your Abatron config file */ - tophys(r5,r5) - stw r6, 0(r5) - -/* Now turn on the MMU for real! */ - lis r4,MSR_KERNEL@h - ori r4,r4,MSR_KERNEL@l - lis r3,start_kernel@h - ori r3,r3,start_kernel@l - mtspr SPRN_SRR0,r3 - mtspr SPRN_SRR1,r4 - rfi /* enable MMU and jump to start_kernel */ - b . /* prevent prefetch past rfi */ - -/* Set up the initial MMU state so we can do the first level of - * kernel initialization. This maps the first 32 MBytes of memory 1:1 - * virtual to physical and more importantly sets the cache mode. - */ -SYM_FUNC_START_LOCAL(initial_mmu) - tlbia /* Invalidate all TLB entries */ - isync - - /* We should still be executing code at physical address 0x0000xxxx - * at this point. However, start_here is at virtual address - * 0xC000xxxx. So, set up a TLB mapping to cover this once - * translation is enabled. - */ - - lis r3,KERNELBASE@h /* Load the kernel virtual address */ - ori r3,r3,KERNELBASE@l - tophys(r4,r3) /* Load the kernel physical address */ - - iccci r0,r3 /* Invalidate the i-cache before use */ - - /* Load the kernel PID. - */ - li r0,0 - mtspr SPRN_PID,r0 - sync - - /* Configure and load one entry into TLB slots 63 */ - clrrwi r4,r4,10 /* Mask off the real page number */ - ori r4,r4,(TLB_WR | TLB_EX) /* Set the write and execute bits */ - - clrrwi r3,r3,10 /* Mask off the effective page number */ - ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_16M)) - - li r0,63 /* TLB slot 63 */ - - tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ - tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ - - li r0,62 /* TLB slot 62 */ - addis r4,r4,SZ_16M@h - addis r3,r3,SZ_16M@h - tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ - tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ - - isync - - /* Establish the exception vector base - */ - lis r4,KERNELBASE@h /* EVPR only uses the high 16-bits */ - tophys(r0,r4) /* Use the physical address */ - mtspr SPRN_EVPR,r0 - - blr -SYM_FUNC_END(initial_mmu) - -_GLOBAL(abort) - mfspr r13,SPRN_DBCR0 - oris r13,r13,DBCR0_RST_SYSTEM@h - mtspr SPRN_DBCR0,r13 diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 63a85c16fef4..25642e802ed3 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -35,7 +35,6 @@ #include <asm/asm-offsets.h> #include <asm/ptrace.h> #include <asm/synch.h> -#include <asm/export.h> #include <asm/code-patching-asm.h> #include "head_booke.h" @@ -315,8 +314,8 @@ interrupt_base: * kernel page tables. */ lis r11, PAGE_OFFSET@h - cmplw r10, r11 - blt+ 3f + cmplw cr7, r10, r11 + blt+ cr7, 3f lis r11, swapper_pg_dir@h ori r11, r11, swapper_pg_dir@l @@ -343,7 +342,7 @@ interrupt_base: mtspr SPRN_MMUCR,r12 /* Mask of required permission bits. Note that while we - * do copy ESR:ST to _PAGE_RW position as trying to write + * do copy ESR:ST to _PAGE_WRITE position as trying to write * to an RO page is pretty common, we don't do it with * _PAGE_DIRTY. We could do it, but it's a fairly rare * event so I'd rather take the overhead when it happens @@ -356,7 +355,7 @@ interrupt_base: * place or can we save a couple of instructions here ? */ mfspr r12,SPRN_ESR - li r13,_PAGE_PRESENT|_PAGE_ACCESSED + li r13,_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_READ rlwimi r13,r12,10,30,30 /* Load the PTE */ @@ -429,8 +428,8 @@ interrupt_base: * kernel page tables. */ lis r11, PAGE_OFFSET@h - cmplw r10, r11 - blt+ 3f + cmplw cr7, r10, r11 + blt+ cr7, 3f lis r11, swapper_pg_dir@h ori r11, r11, swapper_pg_dir@l @@ -516,6 +515,7 @@ interrupt_base: * r11 - PTE high word value * r12 - PTE low word value * r13 - TLB index + * cr7 - Result of comparison with PAGE_OFFSET * MMUCR - loaded with proper value when we get here * Upon exit, we reload everything and RFI. */ @@ -534,11 +534,10 @@ finish_tlb_load_44x: tlbwe r10,r13,PPC44x_TLB_PAGEID /* Write PAGEID */ /* And WS 2 */ - li r10,0xf85 /* Mask to apply from PTE */ - rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */ + li r10,0xf84 /* Mask to apply from PTE */ + rlwimi r10,r12,29,30,31 /* DIRTY,READ -> SW,SR position */ and r11,r12,r10 /* Mask PTE bits to keep */ - andi. r10,r12,_PAGE_USER /* User page ? */ - beq 1f /* nope, leave U bits empty */ + bge cr7,1f /* User page ? no, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */ 1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */ @@ -569,8 +568,8 @@ finish_tlb_load_44x: * kernel page tables. */ lis r11,PAGE_OFFSET@h - cmplw cr0,r10,r11 - blt+ 3f + cmplw cr7,r10,r11 + blt+ cr7,3f lis r11,swapper_pg_dir@h ori r11,r11, swapper_pg_dir@l li r12,0 /* MMUCR = 0 */ @@ -587,7 +586,7 @@ finish_tlb_load_44x: 4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */ /* Mask of required permission bits. Note that while we - * do copy ESR:ST to _PAGE_RW position as trying to write + * do copy ESR:ST to _PAGE_WRITE position as trying to write * to an RO page is pretty common, we don't do it with * _PAGE_DIRTY. We could do it, but it's a fairly rare * event so I'd rather take the overhead when it happens @@ -600,7 +599,7 @@ finish_tlb_load_44x: * place or can we save a couple of instructions here ? */ mfspr r12,SPRN_ESR - li r13,_PAGE_PRESENT|_PAGE_ACCESSED + li r13,_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_READ rlwimi r13,r12,10,30,30 /* Load the PTE */ @@ -670,8 +669,8 @@ finish_tlb_load_44x: * kernel page tables. */ lis r11,PAGE_OFFSET@h - cmplw cr0,r10,r11 - blt+ 3f + cmplw cr7,r10,r11 + blt+ cr7,3f lis r11,swapper_pg_dir@h ori r11,r11, swapper_pg_dir@l li r12,0 /* MMUCR = 0 */ @@ -745,6 +744,7 @@ finish_tlb_load_44x: * r11 - PTE high word value * r12 - PTE low word value * r13 - free to use + * cr7 - Result of comparison with PAGE_OFFSET * MMUCR - loaded with proper value when we get here * Upon exit, we reload everything and RFI. */ @@ -754,11 +754,10 @@ finish_tlb_load_47x: tlbwe r11,r13,1 /* And make up word 2 */ - li r10,0xf85 /* Mask to apply from PTE */ - rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */ + li r10,0xf84 /* Mask to apply from PTE */ + rlwimi r10,r12,29,30,31 /* DIRTY,READ -> SW,SR position */ and r11,r12,r10 /* Mask PTE bits to keep */ - andi. r10,r12,_PAGE_USER /* User page ? */ - beq 1f /* nope, leave U bits empty */ + bge cr7,1f /* User page ? no, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */ 1: tlbwe r11,r13,2 diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 7558ba4eb864..63432a33ec49 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -40,7 +40,6 @@ #include <asm/hw_irq.h> #include <asm/cputhreads.h> #include <asm/ppc-opcode.h> -#include <asm/export.h> #include <asm/feature-fixups.h> #ifdef CONFIG_PPC_BOOK3S #include <asm/exception-64s.h> @@ -76,6 +75,13 @@ * 2. The kernel is entered at __start */ +/* + * boot_from_prom and prom_init run at the physical address. Everything + * after prom and kexec entry run at the virtual address (PAGE_OFFSET). + * Secondaries run at the virtual address from generic_secondary_common_init + * onward. + */ + OPEN_FIXED_SECTION(first_256B, 0x0, 0x100) USE_FIXED_SECTION(first_256B) /* @@ -160,12 +166,8 @@ __secondary_hold: std r24,(ABS_ADDR(__secondary_hold_acknowledge, first_256B))(0) sync - li r26,0 -#ifdef CONFIG_PPC_BOOK3E_64 - tovirt(r26,r26) -#endif /* All secondary cpus wait here until told to start. */ -100: ld r12,(ABS_ADDR(__secondary_hold_spinloop, first_256B))(r26) +100: ld r12,(ABS_ADDR(__secondary_hold_spinloop, first_256B))(0) cmpdi 0,r12,0 beq 100b @@ -307,13 +309,11 @@ _GLOBAL(fsl_secondary_thread_init) /* turn on 64-bit mode */ bl enable_64b_mode - /* get a valid TOC pointer, wherever we're mapped at */ - bl relative_toc - tovirt(r2,r2) - /* Book3E initialization */ mr r3,r24 bl book3e_secondary_thread_init + bl relative_toc + b generic_secondary_common_init #endif /* CONFIG_PPC_BOOK3E_64 */ @@ -329,22 +329,24 @@ _GLOBAL(fsl_secondary_thread_init) */ _GLOBAL(generic_secondary_smp_init) FIXUP_ENDIAN + + li r13,0 + + /* Poison TOC */ + li r2,-1 + mr r24,r3 mr r25,r4 /* turn on 64-bit mode */ bl enable_64b_mode - /* get a valid TOC pointer, wherever we're mapped at */ - bl relative_toc - tovirt(r2,r2) - #ifdef CONFIG_PPC_BOOK3E_64 /* Book3E initialization */ mr r3,r24 mr r4,r25 bl book3e_secondary_core_init - + /* Now NIA and r2 are relocated to PAGE_OFFSET if not already */ /* * After common core init has finished, check if the current thread is the * one we wanted to boot. If not, start the specified thread and stop the @@ -372,8 +374,7 @@ _GLOBAL(generic_secondary_smp_init) beq 20f /* start the specified thread */ - LOAD_REG_ADDR(r5, fsl_secondary_thread_init) - ld r4, 0(r5) + LOAD_REG_ADDR(r5, DOTSYM(fsl_secondary_thread_init)) bl book3e_start_thread /* stop the current thread */ @@ -382,6 +383,16 @@ _GLOBAL(generic_secondary_smp_init) 10: b 10b 20: +#else + /* Now the MMU is off, can branch to our PAGE_OFFSET address */ + bcl 20,31,$+4 +1: mflr r11 + addi r11,r11,(2f - 1b) + tovirt(r11, r11) + mtctr r11 + bctr +2: + bl relative_toc #endif generic_secondary_common_init: @@ -475,8 +486,33 @@ SYM_FUNC_START_LOCAL(__mmu_off) rfid b . /* prevent speculative execution */ SYM_FUNC_END(__mmu_off) -#endif +SYM_FUNC_START_LOCAL(start_initialization_book3s) + mflr r25 + + /* Setup some critical 970 SPRs before switching MMU off */ + mfspr r0,SPRN_PVR + srwi r0,r0,16 + cmpwi r0,0x39 /* 970 */ + beq 1f + cmpwi r0,0x3c /* 970FX */ + beq 1f + cmpwi r0,0x44 /* 970MP */ + beq 1f + cmpwi r0,0x45 /* 970GX */ + bne 2f +1: bl __cpu_preinit_ppc970 +2: + + /* Switch off MMU if not already off */ + bl __mmu_off + + /* Now the MMU is off, can return to our PAGE_OFFSET address */ + tovirt(r25,r25) + mtlr r25 + blr +SYM_FUNC_END(start_initialization_book3s) +#endif /* * Here is our main kernel entry point. We support currently 2 kind of entries @@ -496,14 +532,8 @@ __start_initialization_multiplatform: /* Zero r13 (paca) so early program check / mce don't use it */ li r13,0 - /* Get TOC pointer (current runtime address) */ - bl relative_toc - - /* find out where we are now */ - bcl 20,31,$+4 -0: mflr r26 /* r26 = runtime addr here */ - addis r26,r26,(_stext - 0b)@ha - addi r26,r26,(_stext - 0b)@l /* current runtime base addr */ + /* Poison TOC */ + li r2,-1 /* * Are we booted from a PROM Of-type client-interface ? @@ -521,32 +551,41 @@ __start_initialization_multiplatform: mr r29,r9 #endif + /* Get TOC pointer (current runtime address) */ + bl relative_toc + + /* These functions return to the virtual (PAGE_OFFSET) address */ #ifdef CONFIG_PPC_BOOK3E_64 bl start_initialization_book3e - b __after_prom_start #else - /* Setup some critical 970 SPRs before switching MMU off */ - mfspr r0,SPRN_PVR - srwi r0,r0,16 - cmpwi r0,0x39 /* 970 */ - beq 1f - cmpwi r0,0x3c /* 970FX */ - beq 1f - cmpwi r0,0x44 /* 970MP */ - beq 1f - cmpwi r0,0x45 /* 970GX */ - bne 2f -1: bl __cpu_preinit_ppc970 -2: + bl start_initialization_book3s +#endif /* CONFIG_PPC_BOOK3E_64 */ + + /* Get TOC pointer, virtual */ + bl relative_toc + + /* find out where we are now */ + + /* OPAL doesn't pass base address in r4, have to derive it. */ + bcl 20,31,$+4 +0: mflr r26 /* r26 = runtime addr here */ + addis r26,r26,(_stext - 0b)@ha + addi r26,r26,(_stext - 0b)@l /* current runtime base addr */ - /* Switch off MMU if not already off */ - bl __mmu_off b __after_prom_start -#endif /* CONFIG_PPC_BOOK3E_64 */ __REF __boot_from_prom: #ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE + /* Get TOC pointer, non-virtual */ + bl relative_toc + + /* find out where we are now */ + bcl 20,31,$+4 +0: mflr r26 /* r26 = runtime addr here */ + addis r26,r26,(_stext - 0b)@ha + addi r26,r26,(_stext - 0b)@l /* current runtime base addr */ + /* Save parameters */ mr r31,r3 mr r30,r4 @@ -576,7 +615,7 @@ __boot_from_prom: /* Do all of the interaction with OF client interface */ mr r8,r26 - bl prom_init + bl CFUNC(prom_init) #endif /* #CONFIG_PPC_OF_BOOT_TRAMPOLINE */ /* We never return. We also hit that trap if trying to boot @@ -587,18 +626,11 @@ __boot_from_prom: __after_prom_start: #ifdef CONFIG_RELOCATABLE /* process relocations for the final address of the kernel */ - lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */ - sldi r25,r25,32 -#if defined(CONFIG_PPC_BOOK3E_64) - tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */ -#endif lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26) -#if defined(CONFIG_PPC_BOOK3E_64) - tophys(r26,r26) -#endif cmplwi cr0,r7,1 /* flagged to stay where we are ? */ - bne 1f - add r25,r25,r26 + mr r25,r26 /* then use current kernel base */ + beq 1f + LOAD_REG_IMMEDIATE(r25, PAGE_OFFSET) /* else use static kernel base */ 1: mr r3,r25 bl relocate #if defined(CONFIG_PPC_BOOK3E_64) @@ -614,15 +646,10 @@ __after_prom_start: * * Note: This process overwrites the OF exception vectors. */ - li r3,0 /* target addr */ -#ifdef CONFIG_PPC_BOOK3E_64 - tovirt(r3,r3) /* on booke, we already run at PAGE_OFFSET */ -#endif - mr. r4,r26 /* In some cases the loader may */ -#if defined(CONFIG_PPC_BOOK3E_64) - tovirt(r4,r4) -#endif - beq 9f /* have already put us at zero */ + LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET) + mr r4,r26 /* Load the virtual source address into r4 */ + cmpld r3,r4 /* Check if source == dest */ + beq 9f /* If so skip the copy */ li r6,0x100 /* Start offset, the first 0x100 */ /* bytes were copied earlier. */ @@ -632,9 +659,6 @@ __after_prom_start: * variable __run_at_load, if it is set the kernel is treated as relocatable * kernel, otherwise it will be moved to PHYSICAL_START */ -#if defined(CONFIG_PPC_BOOK3E_64) - tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */ -#endif lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26) cmplwi cr0,r7,1 bne 3f @@ -753,9 +777,15 @@ _GLOBAL(pmac_secondary_start) sync slbia - /* get TOC pointer (real address) */ + /* Branch to our PAGE_OFFSET address */ + bcl 20,31,$+4 +1: mflr r11 + addi r11,r11,(2f - 1b) + tovirt(r11, r11) + mtctr r11 + bctr +2: bl relative_toc - tovirt(r2,r2) /* Copy some CPU settings from CPU 0 */ bl __restore_cpu_ppc970 @@ -814,7 +844,7 @@ __secondary_start: * can turn it on below. This is a call to C, which is OK, we're still * running on the emergency stack. */ - bl early_setup_secondary + bl CFUNC(early_setup_secondary) /* * The primary has initialized our kernel stack for us in the paca, grab @@ -853,7 +883,7 @@ start_secondary_prolog: LOAD_PACA_TOC() li r3,0 std r3,0(r1) /* Zero the stack frame pointer */ - bl start_secondary + bl CFUNC(start_secondary) b . /* * Reset stack pointer and call start_secondary @@ -864,7 +894,7 @@ _GLOBAL(start_secondary_resume) ld r1,PACAKSAVE(r13) /* Reload kernel stack pointer */ li r3,0 std r3,0(r1) /* Zero the stack frame pointer */ - bl start_secondary + bl CFUNC(start_secondary) b . #endif @@ -894,10 +924,15 @@ SYM_FUNC_END(enable_64b_mode) * TOC in -mcmodel=medium mode. After we relocate to 0 but before * the MMU is on we need our TOC to be a virtual address otherwise * these pointers will be real addresses which may get stored and - * accessed later with the MMU on. We use tovirt() at the call - * sites to handle this. + * accessed later with the MMU on. We branch to the virtual address + * while still in real mode then call relative_toc again to handle + * this. */ _GLOBAL(relative_toc) +#ifdef CONFIG_PPC_KERNEL_PCREL + tdnei r2,-1 + blr +#else mflr r0 bcl 20,31,$+4 0: mflr r11 @@ -908,15 +943,15 @@ _GLOBAL(relative_toc) .balign 8 p_toc: .8byte .TOC. - 0b +#endif /* * This is where the main kernel code starts. */ __REF start_here_multiplatform: - /* set up the TOC */ - bl relative_toc - tovirt(r2,r2) + /* Adjust TOC for moved kernel. Could adjust when moving it instead. */ + bl relative_toc /* Clear out the BSS. It may have been done in prom_init, * already but that's irrelevant since prom_init will soon @@ -969,7 +1004,7 @@ start_here_multiplatform: */ #ifdef CONFIG_KASAN - bl kasan_early_init + bl CFUNC(kasan_early_init) #endif /* Restore parameters passed from prom_init/kexec */ mr r3,r31 @@ -1002,7 +1037,7 @@ start_here_common: stb r0,PACAIRQHAPPENED(r13) /* Generic kernel entry */ - bl start_kernel + bl CFUNC(start_kernel) /* Not reached */ 0: trap diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S index d438ca74e96c..f9a73fae6464 100644 --- a/arch/powerpc/kernel/head_85xx.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -40,7 +40,6 @@ #include <asm/asm-offsets.h> #include <asm/cache.h> #include <asm/ptrace.h> -#include <asm/export.h> #include <asm/feature-fixups.h> #include "head_booke.h" @@ -295,9 +294,10 @@ set_ivor: /* Macros to hide the PTE size differences * * FIND_PTE -- walks the page tables given EA & pgdir pointer - * r10 -- EA of fault + * r10 -- free * r11 -- PGDIR pointer * r12 -- free + * r13 -- EA of fault * label 2: is the bailout case * * if we find the pte (fall through): @@ -308,34 +308,34 @@ set_ivor: #ifdef CONFIG_PTE_64BIT #ifdef CONFIG_HUGETLB_PAGE #define FIND_PTE \ - rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \ - lwzx r11, r12, r11; /* Get pgd/pmd entry */ \ + rlwinm r12, r13, 14, 18, 28; /* Compute pgdir/pmd offset */ \ + add r12, r11, r12; \ + lwz r11, 4(r12); /* Get pgd/pmd entry */ \ + rlwinm. r10, r11, 32 - _PAGE_PSIZE_SHIFT, 0x1e; /* get tsize*/ \ + bne 1000f; /* Huge page (leaf entry) */ \ rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \ - blt 1000f; /* Normal non-huge page */ \ beq 2f; /* Bail if no table */ \ - oris r11, r11, PD_HUGE@h; /* Put back address bit */ \ - andi. r10, r11, HUGEPD_SHIFT_MASK@l; /* extract size field */ \ - xor r12, r10, r11; /* drop size bits from pointer */ \ - b 1001f; \ -1000: rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \ + rlwimi r12, r13, 23, 20, 28; /* Compute pte address */ \ li r10, 0; /* clear r10 */ \ -1001: lwz r11, 4(r12); /* Get pte entry */ + lwz r11, 4(r12); /* Get pte entry */ \ +1000: #else #define FIND_PTE \ - rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \ - lwzx r11, r12, r11; /* Get pgd/pmd entry */ \ + rlwinm r12, r13, 14, 18, 28; /* Compute pgdir/pmd offset */ \ + add r12, r11, r12; \ + lwz r11, 4(r12); /* Get pgd/pmd entry */ \ rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \ beq 2f; /* Bail if no table */ \ - rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \ + rlwimi r12, r13, 23, 20, 28; /* Compute pte address */ \ lwz r11, 4(r12); /* Get pte entry */ #endif /* HUGEPAGE */ #else /* !PTE_64BIT */ #define FIND_PTE \ - rlwimi r11, r10, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \ + rlwimi r11, r13, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \ lwz r11, 0(r11); /* Get L1 entry */ \ rlwinm. r12, r11, 0, 0, 19; /* Extract L2 (pte) base address */ \ beq 2f; /* Bail if no table */ \ - rlwimi r12, r10, 22, 20, 29; /* Compute PTE address */ \ + rlwimi r12, r13, 22, 20, 29; /* Compute PTE address */ \ lwz r11, 0(r12); /* Get Linux PTE */ #endif @@ -396,7 +396,7 @@ interrupt_base: #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION #else - EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, unknown_exception) + EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, emulation_assist_interrupt) #endif /* System Call Interrupt */ @@ -442,13 +442,13 @@ START_BTB_FLUSH_SECTION BTB_FLUSH(r10) 1: END_BTB_FLUSH_SECTION - mfspr r10, SPRN_DEAR /* Get faulting address */ + mfspr r13, SPRN_DEAR /* Get faulting address */ /* If we are faulting a kernel address, we have to use the * kernel page tables. */ lis r11, PAGE_OFFSET@h - cmplw 5, r10, r11 + cmplw 5, r13, r11 blt 5, 3f lis r11, swapper_pg_dir@h ori r11, r11, swapper_pg_dir@l @@ -471,29 +471,14 @@ END_BTB_FLUSH_SECTION #endif 4: - /* Mask of required permission bits. Note that while we - * do copy ESR:ST to _PAGE_RW position as trying to write - * to an RO page is pretty common, we don't do it with - * _PAGE_DIRTY. We could do it, but it's a fairly rare - * event so I'd rather take the overhead when it happens - * rather than adding an instruction here. We should measure - * whether the whole thing is worth it in the first place - * as we could avoid loading SPRN_ESR completely in the first - * place... - * - * TODO: Is it worth doing that mfspr & rlwimi in the first - * place or can we save a couple of instructions here ? - */ - mfspr r12,SPRN_ESR + FIND_PTE + #ifdef CONFIG_PTE_64BIT - li r13,_PAGE_PRESENT + li r13,_PAGE_PRESENT|_PAGE_BAP_SR oris r13,r13,_PAGE_ACCESSED@h #else - li r13,_PAGE_PRESENT|_PAGE_ACCESSED + li r13,_PAGE_PRESENT|_PAGE_READ|_PAGE_ACCESSED #endif - rlwimi r13,r12,11,29,29 - - FIND_PTE andc. r13,r13,r11 /* Check permission */ #ifdef CONFIG_PTE_64BIT @@ -550,13 +535,13 @@ START_BTB_FLUSH_SECTION 1: END_BTB_FLUSH_SECTION - mfspr r10, SPRN_SRR0 /* Get faulting address */ + mfspr r13, SPRN_SRR0 /* Get faulting address */ /* If we are faulting a kernel address, we have to use the * kernel page tables. */ lis r11, PAGE_OFFSET@h - cmplw 5, r10, r11 + cmplw 5, r13, r11 blt 5, 3f lis r11, swapper_pg_dir@h ori r11, r11, swapper_pg_dir@l @@ -565,6 +550,7 @@ END_BTB_FLUSH_SECTION rlwinm r12,r12,0,16,1 mtspr SPRN_MAS1,r12 + FIND_PTE /* Make up the required permissions for kernel code */ #ifdef CONFIG_PTE_64BIT li r13,_PAGE_PRESENT | _PAGE_BAP_SX @@ -585,6 +571,7 @@ END_BTB_FLUSH_SECTION beq 2f /* KUAP fault */ #endif + FIND_PTE /* Make up the required permissions for user code */ #ifdef CONFIG_PTE_64BIT li r13,_PAGE_PRESENT | _PAGE_BAP_UX @@ -594,7 +581,6 @@ END_BTB_FLUSH_SECTION #endif 4: - FIND_PTE andc. r13,r13,r11 /* Check permission */ #ifdef CONFIG_PTE_64BIT @@ -747,17 +733,12 @@ finish_tlb_load: lwz r15, 0(r14) 100: stw r15, 0(r17) - /* - * Calc MAS1_TSIZE from r10 (which has pshift encoded) - * tlb_enc = (pshift - 10). - */ - subi r15, r10, 10 mfspr r16, SPRN_MAS1 - rlwimi r16, r15, 7, 20, 24 + rlwimi r16, r10, MAS1_TSIZE_SHIFT, MAS1_TSIZE_MASK mtspr SPRN_MAS1, r16 /* copy the pshift for use later */ - mr r14, r10 + addi r14, r10, _PAGE_PSIZE_SHIFT_OFFSET /* fall through */ @@ -784,15 +765,15 @@ BEGIN_MMU_FTR_SECTION mtspr SPRN_MAS7, r10 END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) #else - li r10, (_PAGE_EXEC | _PAGE_PRESENT) + li r10, (_PAGE_EXEC | _PAGE_READ) mr r13, r11 rlwimi r10, r11, 31, 29, 29 /* extract _PAGE_DIRTY into SW */ and r12, r11, r10 - andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */ + mcrf cr0, cr5 /* Test for user page */ slwi r10, r12, 1 or r10, r10, r12 rlwinm r10, r10, 0, ~_PAGE_EXEC /* Clear SX on user pages */ - iseleq r12, r12, r10 + isellt r12, r10, r12 rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */ mtspr SPRN_MAS3, r13 #endif @@ -864,7 +845,7 @@ _GLOBAL(load_up_spe) * SPE unavailable trap from kernel - print a message, but let * the task use SPE in the kernel until it returns to user mode. */ -KernelSPE: +SYM_FUNC_START_LOCAL(KernelSPE) lwz r3,_MSR(r1) oris r3,r3,MSR_SPE@h stw r3,_MSR(r1) /* enable use of SPE after return */ @@ -881,6 +862,7 @@ KernelSPE: #endif .align 4,0 +SYM_FUNC_END(KernelSPE) #endif /* CONFIG_SPE */ /* diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index a79751e05781..393e19ee1322 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -29,7 +29,6 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/ptrace.h> -#include <asm/export.h> #include <asm/code-patching-asm.h> #include <asm/interrupt.h> @@ -41,16 +40,6 @@ #include "head_32.h" -.macro compare_to_kernel_boundary scratch, addr -#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 -/* By simply checking Address >= 0x80000000, we know if its a kernel address */ - not. \scratch, \addr -#else - rlwinm \scratch, \addr, 16, 0xfff8 - cmpli cr0, \scratch, PAGE_OFFSET@h -#endif -.endm - #define PAGE_SHIFT_512K 19 #define PAGE_SHIFT_8M 23 @@ -173,7 +162,7 @@ instruction_counter: * For the MPC8xx, this is a software tablewalk to load the instruction * TLB. The task switch loads the M_TWB register with the pointer to the first * level table. - * If we discover there is no second level table (value is zero) or if there + * If there is no second level table (value is zero) or if there * is an invalid pte, we load that into the TLB, which causes another fault * into the TLB Error interrupt where we can handle such problems. * We have to use the MD_xxx registers for the tablewalk because the @@ -194,25 +183,11 @@ instruction_counter: mtspr SPRN_SPRG_SCRATCH2, r10 mtspr SPRN_M_TW, r11 - /* If we are faulting a kernel address, we have to use the - * kernel page tables. - */ mfspr r10, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11) mtspr SPRN_MD_EPN, r10 -#ifdef CONFIG_MODULES - mfcr r11 - compare_to_kernel_boundary r10, r10 -#endif mfspr r10, SPRN_M_TWB /* Get level 1 table */ -#ifdef CONFIG_MODULES - blt+ 3f - rlwinm r10, r10, 0, 20, 31 - oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha -3: - mtcr r11 -#endif - lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ + lwz r11, 0(r10) /* Get level 1 entry */ mtspr SPRN_MD_TWC, r11 mfspr r10, SPRN_MD_TWC lwz r10, 0(r10) /* Get the pte */ @@ -249,20 +224,9 @@ instruction_counter: START_EXCEPTION(INTERRUPT_DATA_TLB_MISS_8xx, DataStoreTLBMiss) mtspr SPRN_SPRG_SCRATCH2, r10 mtspr SPRN_M_TW, r11 - mfcr r11 - /* If we are faulting a kernel address, we have to use the - * kernel page tables. - */ - mfspr r10, SPRN_MD_EPN - compare_to_kernel_boundary r10, r10 mfspr r10, SPRN_M_TWB /* Get level 1 table */ - blt+ 3f - rlwinm r10, r10, 0, 20, 31 - oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha -3: - mtcr r11 - lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ + lwz r11, 0(r10) /* Get level 1 entry */ mtspr SPRN_MD_TWC, r11 mfspr r10, SPRN_MD_TWC @@ -333,15 +297,19 @@ instruction_counter: cmpwi cr1, r11, RPN_PATTERN beq- cr1, FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ + mfspr r11, SPRN_DSISR + rlwinm r11, r11, 0, DSISR_NOHPTE + cmpwi cr1, r11, 0 + beq+ cr1, .Ldtlbie + mfspr r11, SPRN_DAR + tlbie r11 + rlwinm r11, r11, 16, 0xffff + cmplwi cr1, r11, TASK_SIZE@h + bge- cr1, FixupPGD +.Ldtlbie: EXCEPTION_PROLOG_1 /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataTLBError handle_dar_dsisr=1 - lwz r4, _DAR(r11) - lwz r5, _DSISR(r11) - andis. r10,r5,DSISR_NOHPTE@h - beq+ .Ldtlbie - tlbie r4 -.Ldtlbie: prepare_transfer_to_handler bl do_page_fault b interrupt_return @@ -395,6 +363,30 @@ DARFixed:/* Return from dcbx instruction bug workaround */ __HEAD . = 0x2000 +FixupPGD: + mtspr SPRN_M_TW, r10 + mfspr r10, SPRN_DAR + mtspr SPRN_MD_EPN, r10 + mfspr r11, SPRN_M_TWB /* Get level 1 table */ + lwz r10, 0(r11) /* Get the level 1 entry */ + cmpwi cr1, r10, 0 + bne cr1, 1f + + rlwinm r10, r11, 0, 20, 31 + oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha + lwz r10, (swapper_pg_dir - PAGE_OFFSET)@l(r10) /* Get the level 1 entry */ + cmpwi cr1, r10, 0 + beq cr1, 1f + stw r10, 0(r11) /* Set the level 1 entry */ + mfspr r10, SPRN_M_TW + mtcr r10 + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 + rfi +1: + mfspr r10, SPRN_M_TW + b .Ldtlbie + /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions * by decoding the registers used by the dcbx instruction and adding them. * DAR is set to the calculated address. @@ -405,7 +397,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ mfspr r10, SPRN_SRR0 mtspr SPRN_MD_EPN, r10 rlwinm r11, r10, 16, 0xfff8 - cmpli cr1, r11, PAGE_OFFSET@h + cmpli cr1, r11, TASK_SIZE@h mfspr r11, SPRN_M_TWB /* Get level 1 table */ blt+ cr1, 3f @@ -413,17 +405,17 @@ FixupDAR:/* Entry point for dcbx workaround. */ tophys(r11, r10) mfspr r11, SPRN_M_TWB /* Get level 1 table */ rlwinm r11, r11, 0, 20, 31 - oris r11, r11, (swapper_pg_dir - PAGE_OFFSET)@ha + oris r11, r11, (swapper_pg_dir - PAGE_OFFSET)@h + ori r11, r11, (swapper_pg_dir - PAGE_OFFSET)@l 3: - lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ + lwz r11, 0(r11) /* Get the level 1 entry */ + rlwinm r11, r11, 0, ~_PMD_PAGE_8M mtspr SPRN_MD_TWC, r11 - mtcrf 0x01, r11 mfspr r11, SPRN_MD_TWC lwz r11, 0(r11) /* Get the pte */ - bt 28,200f /* bit 28 = Large page (8M) */ /* concat physical page address(r11) and page offset(r10) */ rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31 -201: lwz r11,0(r11) + lwz r11,0(r11) /* Check if it really is a dcbx instruction. */ /* dcbt and dcbtst does not generate DTLB Misses/Errors, * no need to include them here */ @@ -442,11 +434,6 @@ FixupDAR:/* Entry point for dcbx workaround. */ 141: mfspr r10,SPRN_M_TW b DARFixed /* Nope, go back to normal TLB processing */ -200: - /* concat physical page address(r11) and page offset(r10) */ - rlwimi r11, r10, 0, 32 - PAGE_SHIFT_8M, 31 - b 201b - 144: mfspr r10, SPRN_DSISR rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ mtspr SPRN_DSISR, r10 @@ -501,6 +488,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ bctr /* jump into table */ 152: mfdar r11 + mtdar r10 mtctr r11 /* restore ctr reg from DAR */ mfspr r11, SPRN_SPRG_THREAD stw r10, DAR(r11) @@ -541,7 +529,8 @@ start_here: li r0,0 stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1) - lis r6, swapper_pg_dir@ha + lis r6, swapper_pg_dir@h + ori r6, r6, swapper_pg_dir@l tophys(r6,r6) mtspr SPRN_M_TWB, r6 @@ -594,6 +583,10 @@ start_here: lis r0, (MD_TWAM | MD_RSV4I)@h mtspr SPRN_MD_CTR, r0 #endif +#ifndef CONFIG_PIN_TLB_TEXT + li r0, 0 + mtspr SPRN_MI_CTR, r0 +#endif #if !defined(CONFIG_PIN_TLB_DATA) && !defined(CONFIG_PIN_TLB_IMMR) lis r0, MD_TWAM@h mtspr SPRN_MD_CTR, r0 @@ -690,6 +683,7 @@ SYM_FUNC_START_LOCAL(initial_mmu) blr SYM_FUNC_END(initial_mmu) +#ifdef CONFIG_PIN_TLB _GLOBAL(mmu_pin_tlb) lis r9, (1f - PAGE_OFFSET)@h ori r9, r9, (1f - PAGE_OFFSET)@l @@ -711,6 +705,7 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_MD_CTR, r6 tlbia +#ifdef CONFIG_PIN_TLB_TEXT LOAD_REG_IMMEDIATE(r5, 28 << 8) LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED) @@ -731,6 +726,7 @@ _GLOBAL(mmu_pin_tlb) bdnzt lt, 2b lis r0, MI_RSV4I@h mtspr SPRN_MI_CTR, r0 +#endif LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM) #ifdef CONFIG_PIN_TLB_DATA @@ -790,3 +786,4 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_SRR1, r10 mtspr SPRN_SRR0, r11 rfi +#endif diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index c51f28b5abc0..cb2bca76be53 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -31,7 +31,6 @@ #include <asm/ptrace.h> #include <asm/bug.h> #include <asm/kvm_book3s_asm.h> -#include <asm/export.h> #include <asm/feature-fixups.h> #include <asm/interrupt.h> @@ -412,44 +411,34 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) */ . = INTERRUPT_INST_TLB_MISS_603 InstructionTLBMiss: -/* - * r0: scratch - * r1: linux style pte ( later becomes ppc hardware pte ) - * r2: ptr to linux-style pte - * r3: scratch - */ /* Get PTE (linux-style) and check access */ - mfspr r3,SPRN_IMISS -#ifdef CONFIG_MODULES - lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r3 -#endif + mfspr r0,SPRN_IMISS mfspr r2, SPRN_SDR1 - li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER - rlwinm r2, r2, 28, 0xfffff000 -#ifdef CONFIG_MODULES - bgt- 112f - lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC - addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ -#endif -112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ + rlwinm r2, r2, 28, 0xfffff000 + rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ +#ifdef CONFIG_EXECMEM + rlwinm r3, r0, 4, 0xf + subi r3, r3, (TASK_SIZE >> 28) & 0xf +#endif rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- InstructionAddressInvalid /* return if no mapping */ - rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ - lwz r0,0(r2) /* get linux-style pte */ - andc. r1,r1,r0 /* check access & ~permission */ + rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ + lwz r2,0(r2) /* get linux-style pte */ + andc. r1,r1,r2 /* check access & ~permission */ bne- InstructionAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ +#ifdef CONFIG_EXECMEM + rlwimi r2, r3, 1, 31, 31 /* userspace ? -> PP lsb */ +#endif ori r1, r1, 0xe06 /* clear out reserved bits */ - andc r1, r0, r1 /* PP = user? 1 : 0 */ + andc r1, r2, r1 /* PP = user? 1 : 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mtspr SPRN_RPA,r1 - tlbli r3 + tlbli r0 mfspr r3,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r3 rfi @@ -478,63 +467,62 @@ InstructionAddressInvalid: */ . = INTERRUPT_DATA_LOAD_TLB_MISS_603 DataLoadTLBMiss: -/* - * r0: scratch - * r1: linux style pte ( later becomes ppc hardware pte ) - * r2: ptr to linux-style pte - * r3: scratch - */ /* Get PTE (linux-style) and check access */ - mfspr r3,SPRN_DMISS - lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r3 + mfspr r0,SPRN_DMISS mfspr r2, SPRN_SDR1 - li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER - rlwinm r2, r2, 28, 0xfffff000 - bgt- 112f - lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r1, _PAGE_PRESENT | _PAGE_ACCESSED - addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ -112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ - lwz r2,0(r2) /* get pmd entry */ + rlwinm r1, r2, 28, 0xfffff000 + rlwimi r1,r0,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r1) /* get pmd entry */ + rlwinm r3, r0, 4, 0xf rlwinm. r2,r2,0,0,19 /* extract address of pte page */ - beq- DataAddressInvalid /* return if no mapping */ - rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ - lwz r0,0(r2) /* get linux-style pte */ - andc. r1,r1,r0 /* check access & ~permission */ + subi r3, r3, (TASK_SIZE >> 28) & 0xf + beq- 2f /* bail if no mapping */ +1: rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ + lwz r2,0(r2) /* get linux-style pte */ + li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_READ + andc. r1,r1,r2 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */ - rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ - rlwimi r1,r0,32-3,24,24 /* _PAGE_RW -> _PAGE_DIRTY */ - rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ + rlwinm r1,r2,32-9,30,30 /* _PAGE_WRITE -> PP msb */ + rlwimi r2,r3,2,30,31 /* userspace ? -> PP */ + rlwimi r1,r2,32-3,24,24 /* _PAGE_WRITE -> _PAGE_DIRTY */ xori r1,r1,_PAGE_DIRTY /* clear dirty when not rw */ ori r1,r1,0xe04 /* clear out reserved bits */ - andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */ + andc r1,r2,r1 /* PP = user? rw? 1: 3: 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mtspr SPRN_RPA,r1 BEGIN_MMU_FTR_SECTION - li r0,1 + li r3,1 mfspr r1,SPRN_SPRG_603_LRU - rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */ - slw r0,r0,r2 - xor r1,r0,r1 - srw r0,r1,r2 + rlwinm r2,r0,20,27,31 /* Get Address bits 15:19 */ + slw r3,r3,r2 + xor r1,r3,r1 + srw r3,r1,r2 mtspr SPRN_SPRG_603_LRU,r1 mfspr r2,SPRN_SRR1 - rlwimi r2,r0,31-14,14,14 + rlwimi r2,r3,31-14,14,14 mtspr SPRN_SRR1,r2 mtcrf 0x80,r2 - tlbld r3 + tlbld r0 rfi MMU_FTR_SECTION_ELSE mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r2 - tlbld r3 + tlbld r0 rfi ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) + +2: lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha + addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ + rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ + cmpwi cr0,r2,0 + beq- DataAddressInvalid /* return if no mapping */ + stw r2,0(r1) + rlwinm. r2,r2,0,0,19 /* extract address of pte page */ + b 1b DataAddressInvalid: mfspr r3,SPRN_SRR1 rlwinm r1,r3,9,6,6 /* Get load/store bit */ @@ -558,35 +546,26 @@ DataAddressInvalid: */ . = INTERRUPT_DATA_STORE_TLB_MISS_603 DataStoreTLBMiss: -/* - * r0: scratch - * r1: linux style pte ( later becomes ppc hardware pte ) - * r2: ptr to linux-style pte - * r3: scratch - */ /* Get PTE (linux-style) and check access */ - mfspr r3,SPRN_DMISS - lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r3 + mfspr r0,SPRN_DMISS mfspr r2, SPRN_SDR1 - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER - rlwinm r2, r2, 28, 0xfffff000 - bgt- 112f - lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED - addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ -112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ - lwz r2,0(r2) /* get pmd entry */ + rlwinm r1, r2, 28, 0xfffff000 + rlwimi r1,r0,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r1) /* get pmd entry */ + rlwinm r3, r0, 4, 0xf rlwinm. r2,r2,0,0,19 /* extract address of pte page */ - beq- DataAddressInvalid /* return if no mapping */ - rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ - lwz r0,0(r2) /* get linux-style pte */ - andc. r1,r1,r0 /* check access & ~permission */ + subi r3, r3, (TASK_SIZE >> 28) & 0xf + beq- 2f /* bail if no mapping */ +1: + rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ + lwz r2,0(r2) /* get linux-style pte */ + li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED + andc. r1,r1,r2 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ + rlwimi r2,r3,1,31,31 /* userspace ? -> PP lsb */ li r1,0xe06 /* clear out reserved bits & PP msb */ - andc r1,r0,r1 /* PP = user? 1: 0 */ + andc r1,r2,r1 /* PP = user? 1: 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) @@ -594,26 +573,36 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r2 BEGIN_MMU_FTR_SECTION - li r0,1 + li r3,1 mfspr r1,SPRN_SPRG_603_LRU - rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */ - slw r0,r0,r2 - xor r1,r0,r1 - srw r0,r1,r2 + rlwinm r2,r0,20,27,31 /* Get Address bits 15:19 */ + slw r3,r3,r2 + xor r1,r3,r1 + srw r3,r1,r2 mtspr SPRN_SPRG_603_LRU,r1 mfspr r2,SPRN_SRR1 - rlwimi r2,r0,31-14,14,14 + rlwimi r2,r3,31-14,14,14 mtspr SPRN_SRR1,r2 mtcrf 0x80,r2 - tlbld r3 + tlbld r0 rfi MMU_FTR_SECTION_ELSE mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r2 - tlbld r3 + tlbld r0 rfi ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) +2: lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha + addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ + rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ + cmpwi cr0,r2,0 + beq- DataAddressInvalid /* return if no mapping */ + stw r2,0(r1) + rlwinm r2,r2,0,0,19 /* extract address of pte page */ + b 1b + #ifndef CONFIG_ALTIVEC #define altivec_assist_exception unknown_exception #endif @@ -690,7 +679,8 @@ hash_page_dsi: mfdar r4 mfsrr0 r5 mfsrr1 r9 - rlwinm r3, r3, 32 - 15, _PAGE_RW /* DSISR_STORE -> _PAGE_RW */ + rlwinm r3, r3, 32 - 15, _PAGE_WRITE /* DSISR_STORE -> _PAGE_WRITE */ + ori r3, r3, _PAGE_PRESENT | _PAGE_READ bl hash_page mfspr r10, SPRN_SPRG_THREAD restore_regs_thread r10 @@ -700,7 +690,7 @@ hash_page_isi: mr r11, r10 mfspr r10, SPRN_SPRG_THREAD save_regs_thread r10 - li r3, 0 + li r3, _PAGE_PRESENT | _PAGE_EXEC lwz r4, SRR0(r10) lwz r9, SRR1(r10) bl hash_page diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 37d43c172676..75471fb6fb10 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -5,8 +5,9 @@ #include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */ #include <asm/kvm_asm.h> #include <asm/kvm_booke_hv_asm.h> +#include <asm/thread_info.h> /* for THREAD_SHIFT */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* * Macros used for common Book-e exception handling @@ -144,10 +145,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) b transfer_to_syscall /* jump to handler */ .endm -/* To handle the additional exception priority levels on 40x and Book-E +/* To handle the additional exception priority levels on Book-E * processors we allocate a stack per additional priority level. * - * On 40x critical is the only additional level * On 44x/e500 we have critical and machine check * * Additionally we reserve a SPRG for each priority level so we can free up a @@ -522,5 +522,5 @@ label: bl kernel_fp_unavailable_exception; \ b interrupt_return -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __HEAD_BOOKE_H__ */ diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index e1b4e70c8fd0..a1318ce18d0e 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -43,16 +43,6 @@ int hw_breakpoint_slots(int type) return 0; /* no instruction breakpoints available */ } -static bool single_step_pending(void) -{ - int i; - - for (i = 0; i < nr_wp_slots(); i++) { - if (current->thread.last_hit_ubp[i]) - return true; - } - return false; -} /* * Install a perf counter breakpoint. @@ -84,7 +74,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) * Do not install DABR values if the instruction must be single-stepped. * If so, DABR will be populated in single_step_dabr_instruction(). */ - if (!single_step_pending()) + if (!info->perf_single_step) __set_breakpoint(i, info); return 0; @@ -124,275 +114,6 @@ static bool is_ptrace_bp(struct perf_event *bp) return bp->overflow_handler == ptrace_triggered; } -struct breakpoint { - struct list_head list; - struct perf_event *bp; - bool ptrace_bp; -}; - -/* - * While kernel/events/hw_breakpoint.c does its own synchronization, we cannot - * rely on it safely synchronizing internals here; however, we can rely on it - * not requesting more breakpoints than available. - */ -static DEFINE_SPINLOCK(cpu_bps_lock); -static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]); -static DEFINE_SPINLOCK(task_bps_lock); -static LIST_HEAD(task_bps); - -static struct breakpoint *alloc_breakpoint(struct perf_event *bp) -{ - struct breakpoint *tmp; - - tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); - if (!tmp) - return ERR_PTR(-ENOMEM); - tmp->bp = bp; - tmp->ptrace_bp = is_ptrace_bp(bp); - return tmp; -} - -static bool bp_addr_range_overlap(struct perf_event *bp1, struct perf_event *bp2) -{ - __u64 bp1_saddr, bp1_eaddr, bp2_saddr, bp2_eaddr; - - bp1_saddr = ALIGN_DOWN(bp1->attr.bp_addr, HW_BREAKPOINT_SIZE); - bp1_eaddr = ALIGN(bp1->attr.bp_addr + bp1->attr.bp_len, HW_BREAKPOINT_SIZE); - bp2_saddr = ALIGN_DOWN(bp2->attr.bp_addr, HW_BREAKPOINT_SIZE); - bp2_eaddr = ALIGN(bp2->attr.bp_addr + bp2->attr.bp_len, HW_BREAKPOINT_SIZE); - - return (bp1_saddr < bp2_eaddr && bp1_eaddr > bp2_saddr); -} - -static bool alternate_infra_bp(struct breakpoint *b, struct perf_event *bp) -{ - return is_ptrace_bp(bp) ? !b->ptrace_bp : b->ptrace_bp; -} - -static bool can_co_exist(struct breakpoint *b, struct perf_event *bp) -{ - return !(alternate_infra_bp(b, bp) && bp_addr_range_overlap(b->bp, bp)); -} - -static int task_bps_add(struct perf_event *bp) -{ - struct breakpoint *tmp; - - tmp = alloc_breakpoint(bp); - if (IS_ERR(tmp)) - return PTR_ERR(tmp); - - spin_lock(&task_bps_lock); - list_add(&tmp->list, &task_bps); - spin_unlock(&task_bps_lock); - return 0; -} - -static void task_bps_remove(struct perf_event *bp) -{ - struct list_head *pos, *q; - - spin_lock(&task_bps_lock); - list_for_each_safe(pos, q, &task_bps) { - struct breakpoint *tmp = list_entry(pos, struct breakpoint, list); - - if (tmp->bp == bp) { - list_del(&tmp->list); - kfree(tmp); - break; - } - } - spin_unlock(&task_bps_lock); -} - -/* - * If any task has breakpoint from alternate infrastructure, - * return true. Otherwise return false. - */ -static bool all_task_bps_check(struct perf_event *bp) -{ - struct breakpoint *tmp; - bool ret = false; - - spin_lock(&task_bps_lock); - list_for_each_entry(tmp, &task_bps, list) { - if (!can_co_exist(tmp, bp)) { - ret = true; - break; - } - } - spin_unlock(&task_bps_lock); - return ret; -} - -/* - * If same task has breakpoint from alternate infrastructure, - * return true. Otherwise return false. - */ -static bool same_task_bps_check(struct perf_event *bp) -{ - struct breakpoint *tmp; - bool ret = false; - - spin_lock(&task_bps_lock); - list_for_each_entry(tmp, &task_bps, list) { - if (tmp->bp->hw.target == bp->hw.target && - !can_co_exist(tmp, bp)) { - ret = true; - break; - } - } - spin_unlock(&task_bps_lock); - return ret; -} - -static int cpu_bps_add(struct perf_event *bp) -{ - struct breakpoint **cpu_bp; - struct breakpoint *tmp; - int i = 0; - - tmp = alloc_breakpoint(bp); - if (IS_ERR(tmp)) - return PTR_ERR(tmp); - - spin_lock(&cpu_bps_lock); - cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu); - for (i = 0; i < nr_wp_slots(); i++) { - if (!cpu_bp[i]) { - cpu_bp[i] = tmp; - break; - } - } - spin_unlock(&cpu_bps_lock); - return 0; -} - -static void cpu_bps_remove(struct perf_event *bp) -{ - struct breakpoint **cpu_bp; - int i = 0; - - spin_lock(&cpu_bps_lock); - cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu); - for (i = 0; i < nr_wp_slots(); i++) { - if (!cpu_bp[i]) - continue; - - if (cpu_bp[i]->bp == bp) { - kfree(cpu_bp[i]); - cpu_bp[i] = NULL; - break; - } - } - spin_unlock(&cpu_bps_lock); -} - -static bool cpu_bps_check(int cpu, struct perf_event *bp) -{ - struct breakpoint **cpu_bp; - bool ret = false; - int i; - - spin_lock(&cpu_bps_lock); - cpu_bp = per_cpu_ptr(cpu_bps, cpu); - for (i = 0; i < nr_wp_slots(); i++) { - if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) { - ret = true; - break; - } - } - spin_unlock(&cpu_bps_lock); - return ret; -} - -static bool all_cpu_bps_check(struct perf_event *bp) -{ - int cpu; - - for_each_online_cpu(cpu) { - if (cpu_bps_check(cpu, bp)) - return true; - } - return false; -} - -int arch_reserve_bp_slot(struct perf_event *bp) -{ - int ret; - - /* ptrace breakpoint */ - if (is_ptrace_bp(bp)) { - if (all_cpu_bps_check(bp)) - return -ENOSPC; - - if (same_task_bps_check(bp)) - return -ENOSPC; - - return task_bps_add(bp); - } - - /* perf breakpoint */ - if (is_kernel_addr(bp->attr.bp_addr)) - return 0; - - if (bp->hw.target && bp->cpu == -1) { - if (same_task_bps_check(bp)) - return -ENOSPC; - - return task_bps_add(bp); - } else if (!bp->hw.target && bp->cpu != -1) { - if (all_task_bps_check(bp)) - return -ENOSPC; - - return cpu_bps_add(bp); - } - - if (same_task_bps_check(bp)) - return -ENOSPC; - - ret = cpu_bps_add(bp); - if (ret) - return ret; - ret = task_bps_add(bp); - if (ret) - cpu_bps_remove(bp); - - return ret; -} - -void arch_release_bp_slot(struct perf_event *bp) -{ - if (!is_kernel_addr(bp->attr.bp_addr)) { - if (bp->hw.target) - task_bps_remove(bp); - if (bp->cpu != -1) - cpu_bps_remove(bp); - } -} - -/* - * Perform cleanup of arch-specific counters during unregistration - * of the perf-event - */ -void arch_unregister_hw_breakpoint(struct perf_event *bp) -{ - /* - * If the breakpoint is unregistered between a hw_breakpoint_handler() - * and the single_step_dabr_instruction(), then cleanup the breakpoint - * restoration variables to prevent dangling pointers. - * FIXME, this should not be using bp->ctx at all! Sayeth peterz. - */ - if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L)) { - int i; - - for (i = 0; i < nr_wp_slots(); i++) { - if (bp->ctx->task->thread.last_hit_ubp[i] == bp) - bp->ctx->task->thread.last_hit_ubp[i] = NULL; - } - } -} - /* * Check for virtual address in kernel space. */ @@ -499,25 +220,36 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, * Restores the breakpoint on the debug registers. * Invoke this function if it is known that the execution context is * about to change to cause loss of MSR_SE settings. + * + * The perf watchpoint will simply re-trigger once the thread is started again, + * and the watchpoint handler will set up MSR_SE and perf_single_step as + * needed. */ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) { struct arch_hw_breakpoint *info; int i; + preempt_disable(); + for (i = 0; i < nr_wp_slots(); i++) { - if (unlikely(tsk->thread.last_hit_ubp[i])) + struct perf_event *bp = __this_cpu_read(bp_per_reg[i]); + + if (unlikely(bp && counter_arch_bp(bp)->perf_single_step)) goto reset; } - return; + goto out; reset: regs_set_return_msr(regs, regs->msr & ~MSR_SE); for (i = 0; i < nr_wp_slots(); i++) { info = counter_arch_bp(__this_cpu_read(bp_per_reg[i])); __set_breakpoint(i, info); - tsk->thread.last_hit_ubp[i] = NULL; + info->perf_single_step = false; } + +out: + preempt_enable(); } static bool is_larx_stcx_instr(int type) @@ -534,23 +266,22 @@ static bool is_octword_vsx_instr(int type, int size) * We've failed in reliably handling the hw-breakpoint. Unregister * it and throw a warning message to let the user know about it. */ -static void handler_error(struct perf_event *bp, struct arch_hw_breakpoint *info) +static void handler_error(struct perf_event *bp) { WARN(1, "Unable to handle hardware breakpoint. Breakpoint at 0x%lx will be disabled.", - info->address); + counter_arch_bp(bp)->address); perf_event_disable_inatomic(bp); } -static void larx_stcx_err(struct perf_event *bp, struct arch_hw_breakpoint *info) +static void larx_stcx_err(struct perf_event *bp) { printk_ratelimited("Breakpoint hit on instruction that can't be emulated. Breakpoint at 0x%lx will be disabled.\n", - info->address); + counter_arch_bp(bp)->address); perf_event_disable_inatomic(bp); } static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, - struct arch_hw_breakpoint **info, int *hit, - ppc_inst_t instr) + int *hit, ppc_inst_t instr) { int i; int stepped; @@ -560,8 +291,9 @@ static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, for (i = 0; i < nr_wp_slots(); i++) { if (!hit[i]) continue; - current->thread.last_hit_ubp[i] = bp[i]; - info[i] = NULL; + + counter_arch_bp(bp[i])->perf_single_step = true; + bp[i] = NULL; } regs_set_return_msr(regs, regs->msr | MSR_SE); return false; @@ -572,15 +304,15 @@ static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, for (i = 0; i < nr_wp_slots(); i++) { if (!hit[i]) continue; - handler_error(bp[i], info[i]); - info[i] = NULL; + handler_error(bp[i]); + bp[i] = NULL; } return false; } return true; } -static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info, +static void handle_p10dd1_spurious_exception(struct perf_event **bp, int *hit, unsigned long ea) { int i; @@ -592,10 +324,14 @@ static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info, * spurious exception. */ for (i = 0; i < nr_wp_slots(); i++) { - if (!info[i]) + struct arch_hw_breakpoint *info; + + if (!bp[i]) continue; - hw_end_addr = ALIGN(info[i]->address + info[i]->len, HW_BREAKPOINT_SIZE); + info = counter_arch_bp(bp[i]); + + hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE); /* * Ending address of DAWR range is less than starting @@ -625,20 +361,24 @@ static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info, return; for (i = 0; i < nr_wp_slots(); i++) { - if (info[i]) { + if (bp[i]) { hit[i] = 1; - info[i]->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + counter_arch_bp(bp[i])->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; } } } +/* + * Handle a DABR or DAWR exception. + * + * Called in atomic context. + */ int hw_breakpoint_handler(struct die_args *args) { bool err = false; int rc = NOTIFY_STOP; struct perf_event *bp[HBP_NUM_MAX] = { NULL }; struct pt_regs *regs = args->regs; - struct arch_hw_breakpoint *info[HBP_NUM_MAX] = { NULL }; int i; int hit[HBP_NUM_MAX] = {0}; int nr_hit = 0; @@ -663,18 +403,20 @@ int hw_breakpoint_handler(struct die_args *args) wp_get_instr_detail(regs, &instr, &type, &size, &ea); for (i = 0; i < nr_wp_slots(); i++) { + struct arch_hw_breakpoint *info; + bp[i] = __this_cpu_read(bp_per_reg[i]); if (!bp[i]) continue; - info[i] = counter_arch_bp(bp[i]); - info[i]->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; + info = counter_arch_bp(bp[i]); + info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; - if (wp_check_constraints(regs, instr, ea, type, size, info[i])) { + if (wp_check_constraints(regs, instr, ea, type, size, info)) { if (!IS_ENABLED(CONFIG_PPC_8xx) && ppc_inst_equal(instr, ppc_inst(0))) { - handler_error(bp[i], info[i]); - info[i] = NULL; + handler_error(bp[i]); + bp[i] = NULL; err = 1; continue; } @@ -693,7 +435,7 @@ int hw_breakpoint_handler(struct die_args *args) /* Workaround for Power10 DD1 */ if (!IS_ENABLED(CONFIG_PPC_8xx) && mfspr(SPRN_PVR) == 0x800100 && is_octword_vsx_instr(type, size)) { - handle_p10dd1_spurious_exception(info, hit, ea); + handle_p10dd1_spurious_exception(bp, hit, ea); } else { rc = NOTIFY_DONE; goto out; @@ -708,10 +450,10 @@ int hw_breakpoint_handler(struct die_args *args) */ if (ptrace_bp) { for (i = 0; i < nr_wp_slots(); i++) { - if (!hit[i]) + if (!hit[i] || !is_ptrace_bp(bp[i])) continue; perf_bp_event(bp[i], regs); - info[i] = NULL; + bp[i] = NULL; } rc = NOTIFY_DONE; goto reset; @@ -722,13 +464,13 @@ int hw_breakpoint_handler(struct die_args *args) for (i = 0; i < nr_wp_slots(); i++) { if (!hit[i]) continue; - larx_stcx_err(bp[i], info[i]); - info[i] = NULL; + larx_stcx_err(bp[i]); + bp[i] = NULL; } goto reset; } - if (!stepping_handler(regs, bp, info, hit, instr)) + if (!stepping_handler(regs, bp, hit, instr)) goto reset; } @@ -739,15 +481,15 @@ int hw_breakpoint_handler(struct die_args *args) for (i = 0; i < nr_wp_slots(); i++) { if (!hit[i]) continue; - if (!(info[i]->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) + if (!(counter_arch_bp(bp[i])->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) perf_bp_event(bp[i], regs); } reset: for (i = 0; i < nr_wp_slots(); i++) { - if (!info[i]) + if (!bp[i]) continue; - __set_breakpoint(i, info[i]); + __set_breakpoint(i, counter_arch_bp(bp[i])); } out: @@ -758,28 +500,34 @@ NOKPROBE_SYMBOL(hw_breakpoint_handler); /* * Handle single-step exceptions following a DABR hit. + * + * Called in atomic context. */ static int single_step_dabr_instruction(struct die_args *args) { struct pt_regs *regs = args->regs; - struct perf_event *bp = NULL; - struct arch_hw_breakpoint *info; - int i; bool found = false; /* * Check if we are single-stepping as a result of a * previous HW Breakpoint exception */ - for (i = 0; i < nr_wp_slots(); i++) { - bp = current->thread.last_hit_ubp[i]; + for (int i = 0; i < nr_wp_slots(); i++) { + struct perf_event *bp; + struct arch_hw_breakpoint *info; + + bp = __this_cpu_read(bp_per_reg[i]); if (!bp) continue; - found = true; info = counter_arch_bp(bp); + if (!info->perf_single_step) + continue; + + found = true; + /* * We shall invoke the user-defined callback function in the * single stepping handler to confirm to 'trigger-after-execute' @@ -787,26 +535,16 @@ static int single_step_dabr_instruction(struct die_args *args) */ if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) perf_bp_event(bp, regs); - current->thread.last_hit_ubp[i] = NULL; - } - if (!found) - return NOTIFY_DONE; - - for (i = 0; i < nr_wp_slots(); i++) { - bp = __this_cpu_read(bp_per_reg[i]); - if (!bp) - continue; - - info = counter_arch_bp(bp); - __set_breakpoint(i, info); + info->perf_single_step = false; + __set_breakpoint(i, counter_arch_bp(bp)); } /* * If the process was being single-stepped by ptrace, let the * other single-step actions occur (e.g. generate SIGTRAP). */ - if (test_thread_flag(TIF_SINGLESTEP)) + if (!found || test_thread_flag(TIF_SINGLESTEP)) return NOTIFY_DONE; return NOTIFY_STOP; @@ -815,6 +553,8 @@ NOKPROBE_SYMBOL(single_step_dabr_instruction); /* * Handle debug exception notifications. + * + * Called in atomic context. */ int hw_breakpoint_exceptions_notify( struct notifier_block *unused, unsigned long val, void *data) diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c index a74623025f3a..9e51801c4915 100644 --- a/arch/powerpc/kernel/hw_breakpoint_constraints.c +++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c @@ -131,8 +131,13 @@ void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr, int *type, int *size, unsigned long *ea) { struct instruction_op op; + int err; - if (__get_user_instr(*instr, (void __user *)regs->nip)) + pagefault_disable(); + err = __get_user_instr(*instr, (void __user *)regs->nip); + pagefault_enable(); + + if (err) return; analyse_instr(&op, regs, *instr); diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 77cd4c5a2d63..e527cd3ef128 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -51,10 +51,9 @@ void arch_cpu_idle(void) * Some power_save functions return with * interrupts enabled, some don't. */ - if (irqs_disabled()) - raw_local_irq_enable(); + if (!irqs_disabled()) + raw_local_irq_disable(); } else { - raw_local_irq_enable(); /* * Go into low thread priority and possibly * low power mode. @@ -98,7 +97,7 @@ void power4_idle(void) /* * Register the sysctl to set/clear powersave_nap. */ -static struct ctl_table powersave_nap_ctl_table[] = { +static const struct ctl_table powersave_nap_ctl_table[] = { { .procname = "powersave-nap", .data = &powersave_nap, @@ -106,21 +105,12 @@ static struct ctl_table powersave_nap_ctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - {} -}; -static struct ctl_table powersave_nap_sysctl_root[] = { - { - .procname = "kernel", - .mode = 0555, - .child = powersave_nap_ctl_table, - }, - {} }; static int __init register_powersave_nap_sysctl(void) { - register_sysctl_table(powersave_nap_sysctl_root); + register_sysctl("kernel", powersave_nap_ctl_table); return 0; } diff --git a/arch/powerpc/kernel/ima_arch.c b/arch/powerpc/kernel/ima_arch.c index 957abd592075..b7029beed847 100644 --- a/arch/powerpc/kernel/ima_arch.c +++ b/arch/powerpc/kernel/ima_arch.c @@ -23,9 +23,9 @@ bool arch_ima_get_secureboot(void) * is not enabled. */ static const char *const secure_rules[] = { - "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", + "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig|modsig", #ifndef CONFIG_MODULE_SIG - "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", + "appraise func=MODULE_CHECK appraise_type=imasig|modsig", #endif NULL }; @@ -49,9 +49,9 @@ static const char *const trusted_rules[] = { static const char *const secure_and_trusted_rules[] = { "measure func=KEXEC_KERNEL_CHECK template=ima-modsig", "measure func=MODULE_CHECK template=ima-modsig", - "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", + "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig|modsig", #ifndef CONFIG_MODULE_SIG - "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", + "appraise func=MODULE_CHECK appraise_type=imasig|modsig", #endif NULL }; diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index fc6631a80527..aea6f7e8e9c6 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -3,6 +3,7 @@ #include <linux/context_tracking.h> #include <linux/err.h> #include <linux/compat.h> +#include <linux/rseq.h> #include <linux/sched/debug.h> /* for show_regs */ #include <asm/kup.h> @@ -24,6 +25,10 @@ unsigned long global_dbcr0[NR_CPUS]; #endif +#if defined(CONFIG_PREEMPT_DYNAMIC) +DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); +#endif + #ifdef CONFIG_PPC_BOOK3S_64 DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant); static inline bool exit_must_hard_disable(void) @@ -33,7 +38,7 @@ static inline bool exit_must_hard_disable(void) #else static inline bool exit_must_hard_disable(void) { - return true; + return false; } #endif @@ -50,16 +55,18 @@ static inline bool exit_must_hard_disable(void) */ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable) { + bool must_hard_disable = (exit_must_hard_disable() || !restartable); + /* This must be done with RI=1 because tracing may touch vmaps */ trace_hardirqs_on(); - if (exit_must_hard_disable() || !restartable) + if (must_hard_disable) __hard_EE_RI_disable(); #ifdef CONFIG_PPC64 /* This pattern matches prep_irq_for_idle */ if (unlikely(lazy_irq_pending_nocheck())) { - if (exit_must_hard_disable() || !restartable) { + if (must_hard_disable) { local_paca->irq_happened |= PACA_IRQ_HARD_DIS; __hard_RI_enable(); } @@ -93,7 +100,7 @@ static notrace void booke_load_dbcr0(void) #endif } -static void check_return_regs_valid(struct pt_regs *regs) +static notrace void check_return_regs_valid(struct pt_regs *regs) { #ifdef CONFIG_PPC_BOOK3S_64 unsigned long trap, srr0, srr1; @@ -123,7 +130,7 @@ static void check_return_regs_valid(struct pt_regs *regs) case 0x1600: case 0x1800: validp = &local_paca->hsrr_valid; - if (!*validp) + if (!READ_ONCE(*validp)) return; srr0 = mfspr(SPRN_HSRR0); @@ -133,7 +140,7 @@ static void check_return_regs_valid(struct pt_regs *regs) break; default: validp = &local_paca->srr_valid; - if (!*validp) + if (!READ_ONCE(*validp)) return; srr0 = mfspr(SPRN_SRR0); @@ -159,19 +166,17 @@ static void check_return_regs_valid(struct pt_regs *regs) * such things will get caught most of the time, statistically * enough to be able to get a warning out. */ - barrier(); - - if (!*validp) + if (!READ_ONCE(*validp)) return; - if (!warned) { - warned = true; + if (!data_race(warned)) { + data_race(warned = true); printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip); printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr); show_regs(regs); } - *validp = 0; /* fixup */ + WRITE_ONCE(*validp, 0); /* fixup */ #endif } @@ -184,7 +189,7 @@ again: ti_flags = read_thread_flags(); while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { local_irq_enable(); - if (ti_flags & _TIF_NEED_RESCHED) { + if (ti_flags & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) { schedule(); } else { /* @@ -265,7 +270,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, unsigned long ret = 0; bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; - CT_WARN_ON(ct_state() == CONTEXT_USER); + CT_WARN_ON(ct_state() == CT_STATE_USER); kuap_assert_locked(); @@ -343,7 +348,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs) BUG_ON(regs_is_unrecoverable(regs)); BUG_ON(arch_irq_disabled_regs(regs)); - CT_WARN_ON(ct_state() == CONTEXT_USER); + CT_WARN_ON(ct_state() == CT_STATE_USER); /* * We don't need to restore AMR on the way back to userspace for KUAP. @@ -366,7 +371,6 @@ void preempt_schedule_irq(void); notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) { - unsigned long flags; unsigned long ret = 0; unsigned long kuap; bool stack_store = read_thread_flags() & _TIF_EMULATE_STACK_STORE; @@ -386,17 +390,17 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) if (!IS_ENABLED(CONFIG_PPC_BOOK3E_64) && TRAP(regs) != INTERRUPT_PROGRAM && TRAP(regs) != INTERRUPT_PERFMON) - CT_WARN_ON(ct_state() == CONTEXT_USER); + CT_WARN_ON(ct_state() == CT_STATE_USER); kuap = kuap_get_and_assert_locked(); - local_irq_save(flags); + local_irq_disable(); if (!arch_irq_disabled_regs(regs)) { /* Returning to a kernel context with local irqs enabled. */ WARN_ON_ONCE(!(regs->msr & MSR_EE)); again: - if (IS_ENABLED(CONFIG_PREEMPT)) { + if (need_irq_preemption()) { /* Return to preemptible kernel context */ if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) { if (preempt_count() == 0) diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index fccc34489add..1ad059a9e2fe 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -52,7 +52,8 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) mr r10,r1 ld r1,PACAKSAVE(r13) std r10,0(r1) - std r11,_NIP(r1) + std r11,_LINK(r1) + std r11,_NIP(r1) /* Saved LR is also the next instruction */ std r12,_MSR(r1) std r0,GPR0(r1) std r10,GPR1(r1) @@ -70,7 +71,6 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) std r9,GPR13(r1) SAVE_NVGPRS(r1) std r11,_XER(r1) - std r11,_LINK(r1) std r11,_CTR(r1) li r11,\trapnr @@ -101,12 +101,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * state of kernel code. */ SANITIZE_SYSCALL_GPRS() - bl system_call_exception + bl CFUNC(system_call_exception) .Lsyscall_vectored_\name\()_exit: addi r4,r1,STACK_INT_FRAME_REGS li r5,1 /* scv */ - bl syscall_exit_prepare + bl CFUNC(syscall_exit_prepare) std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ .Lsyscall_vectored_\name\()_rst_start: lbz r11,PACAIRQHAPPENED(r13) @@ -185,7 +185,7 @@ _ASM_NOKPROBE_SYMBOL(syscall_vectored_\name\()_restart) addi r4,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) - bl syscall_exit_restart + bl CFUNC(syscall_exit_restart) std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ b .Lsyscall_vectored_\name\()_rst_start 1: @@ -286,12 +286,12 @@ END_BTB_FLUSH_SECTION * state of kernel code. */ SANITIZE_SYSCALL_GPRS() - bl system_call_exception + bl CFUNC(system_call_exception) .Lsyscall_exit: addi r4,r1,STACK_INT_FRAME_REGS li r5,0 /* !scv */ - bl syscall_exit_prepare + bl CFUNC(syscall_exit_prepare) std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ #ifdef CONFIG_PPC_BOOK3S .Lsyscall_rst_start: @@ -372,7 +372,7 @@ _ASM_NOKPROBE_SYMBOL(syscall_restart) addi r4,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) - bl syscall_exit_restart + bl CFUNC(syscall_exit_restart) std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ b .Lsyscall_rst_start 1: @@ -401,7 +401,7 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr) li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ bne+ .Lfast_kernel_interrupt_return_srr addi r3,r1,STACK_INT_FRAME_REGS - bl unrecoverable_exception + bl CFUNC(unrecoverable_exception) b . /* should not get here */ #else bne .Lfast_user_interrupt_return_srr @@ -419,7 +419,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()) interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user) addi r3,r1,STACK_INT_FRAME_REGS - bl interrupt_exit_user_prepare + bl CFUNC(interrupt_exit_user_prepare) #ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS cmpdi r3,0 bne- .Lrestore_nvgprs_\srr @@ -523,7 +523,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart) addi r3,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) - bl interrupt_exit_user_restart + bl CFUNC(interrupt_exit_user_restart) std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ b .Linterrupt_return_\srr\()_user_rst_start 1: @@ -536,7 +536,7 @@ RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr interrupt_return_\srr\()_kernel: _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel) addi r3,r1,STACK_INT_FRAME_REGS - bl interrupt_exit_kernel_prepare + bl CFUNC(interrupt_exit_kernel_prepare) std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ .Linterrupt_return_\srr\()_kernel_rst_start: @@ -705,7 +705,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel_restart) addi r3,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) - bl interrupt_exit_kernel_restart + bl CFUNC(interrupt_exit_kernel_restart) std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ b .Linterrupt_return_\srr\()_kernel_rst_start 1: @@ -727,21 +727,20 @@ DEFINE_FIXED_SYMBOL(__end_soft_masked, text) #ifdef CONFIG_PPC_BOOK3S _GLOBAL(ret_from_fork_scv) - bl schedule_tail - REST_NVGPRS(r1) + bl CFUNC(schedule_tail) + HANDLER_RESTORE_NVGPRS() li r3,0 /* fork() return value */ b .Lsyscall_vectored_common_exit #endif _GLOBAL(ret_from_fork) - bl schedule_tail - REST_NVGPRS(r1) + bl CFUNC(schedule_tail) + HANDLER_RESTORE_NVGPRS() li r3,0 /* fork() return value */ b .Lsyscall_exit -_GLOBAL(ret_from_kernel_thread) - bl schedule_tail - REST_NVGPRS(r1) +_GLOBAL(ret_from_kernel_user_thread) + bl CFUNC(schedule_tail) mtctr r14 mr r3,r15 #ifdef CONFIG_PPC64_ELF_ABI_V2 @@ -749,4 +748,25 @@ _GLOBAL(ret_from_kernel_thread) #endif bctrl li r3,0 + /* + * It does not matter whether this returns via the scv or sc path + * because it returns as execve() and therefore has no calling ABI + * (i.e., it sets registers according to the exec()ed entry point). + */ b .Lsyscall_exit + +_GLOBAL(start_kernel_thread) + bl CFUNC(schedule_tail) + mtctr r14 + mr r3,r15 +#ifdef CONFIG_PPC64_ELF_ABI_V2 + mr r12,r14 +#endif + bctrl + /* + * This must not return. We actually want to BUG here, not WARN, + * because BUG will exit the process which is what the kernel thread + * should have done, which may give some hope of continuing. + */ +100: trap + EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0 diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c deleted file mode 100644 index c877f074d174..000000000000 --- a/arch/powerpc/kernel/io-workarounds.c +++ /dev/null @@ -1,197 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Support PCI IO workaround - * - * Copyright (C) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org> - * IBM, Corp. - * (C) Copyright 2007-2008 TOSHIBA CORPORATION - */ -#undef DEBUG - -#include <linux/kernel.h> -#include <linux/sched/mm.h> /* for init_mm */ -#include <linux/pgtable.h> - -#include <asm/io.h> -#include <asm/machdep.h> -#include <asm/ppc-pci.h> -#include <asm/io-workarounds.h> -#include <asm/pte-walk.h> - - -#define IOWA_MAX_BUS 8 - -static struct iowa_bus iowa_busses[IOWA_MAX_BUS]; -static unsigned int iowa_bus_count; - -static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr) -{ - int i, j; - struct resource *res; - unsigned long vstart, vend; - - for (i = 0; i < iowa_bus_count; i++) { - struct iowa_bus *bus = &iowa_busses[i]; - struct pci_controller *phb = bus->phb; - - if (vaddr) { - vstart = (unsigned long)phb->io_base_virt; - vend = vstart + phb->pci_io_size - 1; - if ((vaddr >= vstart) && (vaddr <= vend)) - return bus; - } - - if (paddr) - for (j = 0; j < 3; j++) { - res = &phb->mem_resources[j]; - if (paddr >= res->start && paddr <= res->end) - return bus; - } - } - - return NULL; -} - -#ifdef CONFIG_PPC_INDIRECT_MMIO -struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) -{ - struct iowa_bus *bus; - int token; - - token = PCI_GET_ADDR_TOKEN(addr); - - if (token && token <= iowa_bus_count) - bus = &iowa_busses[token - 1]; - else { - unsigned long vaddr, paddr; - - vaddr = (unsigned long)PCI_FIX_ADDR(addr); - if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) - return NULL; - - paddr = ppc_find_vmap_phys(vaddr); - - bus = iowa_pci_find(vaddr, paddr); - - if (bus == NULL) - return NULL; - } - - return bus; -} -#else /* CONFIG_PPC_INDIRECT_MMIO */ -struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) -{ - return NULL; -} -#endif /* !CONFIG_PPC_INDIRECT_MMIO */ - -#ifdef CONFIG_PPC_INDIRECT_PIO -struct iowa_bus *iowa_pio_find_bus(unsigned long port) -{ - unsigned long vaddr = (unsigned long)pci_io_base + port; - return iowa_pci_find(vaddr, 0); -} -#else -struct iowa_bus *iowa_pio_find_bus(unsigned long port) -{ - return NULL; -} -#endif - -#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) \ -static ret iowa_##name at \ -{ \ - struct iowa_bus *bus; \ - bus = iowa_##space##_find_bus(aa); \ - if (bus && bus->ops && bus->ops->name) \ - return bus->ops->name al; \ - return __do_##name al; \ -} - -#define DEF_PCI_AC_NORET(name, at, al, space, aa) \ -static void iowa_##name at \ -{ \ - struct iowa_bus *bus; \ - bus = iowa_##space##_find_bus(aa); \ - if (bus && bus->ops && bus->ops->name) { \ - bus->ops->name al; \ - return; \ - } \ - __do_##name al; \ -} - -#include <asm/io-defs.h> - -#undef DEF_PCI_AC_RET -#undef DEF_PCI_AC_NORET - -static const struct ppc_pci_io iowa_pci_io = { - -#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) .name = iowa_##name, -#define DEF_PCI_AC_NORET(name, at, al, space, aa) .name = iowa_##name, - -#include <asm/io-defs.h> - -#undef DEF_PCI_AC_RET -#undef DEF_PCI_AC_NORET - -}; - -#ifdef CONFIG_PPC_INDIRECT_MMIO -void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size, - pgprot_t prot, void *caller) -{ - struct iowa_bus *bus; - void __iomem *res = __ioremap_caller(addr, size, prot, caller); - int busno; - - bus = iowa_pci_find(0, (unsigned long)addr); - if (bus != NULL) { - busno = bus - iowa_busses; - PCI_SET_ADDR_TOKEN(res, busno + 1); - } - return res; -} -#endif /* !CONFIG_PPC_INDIRECT_MMIO */ - -bool io_workaround_inited; - -/* Enable IO workaround */ -static void io_workaround_init(void) -{ - if (io_workaround_inited) - return; - ppc_pci_io = iowa_pci_io; - io_workaround_inited = true; -} - -/* Register new bus to support workaround */ -void iowa_register_bus(struct pci_controller *phb, struct ppc_pci_io *ops, - int (*initfunc)(struct iowa_bus *, void *), void *data) -{ - struct iowa_bus *bus; - struct device_node *np = phb->dn; - - io_workaround_init(); - - if (iowa_bus_count >= IOWA_MAX_BUS) { - pr_err("IOWA:Too many pci bridges, " - "workarounds disabled for %pOF\n", np); - return; - } - - bus = &iowa_busses[iowa_bus_count]; - bus->phb = phb; - bus->ops = ops; - bus->private = data; - - if (initfunc) - if ((*initfunc)(bus, data)) - return; - - iowa_bus_count++; - - pr_debug("IOWA:[%d]Add bus, %pOF.\n", iowa_bus_count-1, np); -} - diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c index 2f29b7d432de..bcc201c01514 100644 --- a/arch/powerpc/kernel/io.c +++ b/arch/powerpc/kernel/io.c @@ -31,13 +31,14 @@ void _insb(const volatile u8 __iomem *port, void *buf, long count) if (unlikely(count <= 0)) return; - asm volatile("sync"); + + mb(); do { - tmp = *port; + tmp = *(const volatile u8 __force *)port; eieio(); *tbuf++ = tmp; } while (--count != 0); - asm volatile("twi 0,%0,0; isync" : : "r" (tmp)); + data_barrier(tmp); } EXPORT_SYMBOL(_insb); @@ -47,75 +48,80 @@ void _outsb(volatile u8 __iomem *port, const void *buf, long count) if (unlikely(count <= 0)) return; - asm volatile("sync"); + + mb(); do { - *port = *tbuf++; + *(volatile u8 __force *)port = *tbuf++; } while (--count != 0); - asm volatile("sync"); + mb(); } EXPORT_SYMBOL(_outsb); -void _insw_ns(const volatile u16 __iomem *port, void *buf, long count) +void _insw(const volatile u16 __iomem *port, void *buf, long count) { u16 *tbuf = buf; u16 tmp; if (unlikely(count <= 0)) return; - asm volatile("sync"); + + mb(); do { - tmp = *port; + tmp = *(const volatile u16 __force *)port; eieio(); *tbuf++ = tmp; } while (--count != 0); - asm volatile("twi 0,%0,0; isync" : : "r" (tmp)); + data_barrier(tmp); } -EXPORT_SYMBOL(_insw_ns); +EXPORT_SYMBOL(_insw); -void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count) +void _outsw(volatile u16 __iomem *port, const void *buf, long count) { const u16 *tbuf = buf; if (unlikely(count <= 0)) return; - asm volatile("sync"); + + mb(); do { - *port = *tbuf++; + *(volatile u16 __force *)port = *tbuf++; } while (--count != 0); - asm volatile("sync"); + mb(); } -EXPORT_SYMBOL(_outsw_ns); +EXPORT_SYMBOL(_outsw); -void _insl_ns(const volatile u32 __iomem *port, void *buf, long count) +void _insl(const volatile u32 __iomem *port, void *buf, long count) { u32 *tbuf = buf; u32 tmp; if (unlikely(count <= 0)) return; - asm volatile("sync"); + + mb(); do { - tmp = *port; + tmp = *(const volatile u32 __force *)port; eieio(); *tbuf++ = tmp; } while (--count != 0); - asm volatile("twi 0,%0,0; isync" : : "r" (tmp)); + data_barrier(tmp); } -EXPORT_SYMBOL(_insl_ns); +EXPORT_SYMBOL(_insl); -void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count) +void _outsl(volatile u32 __iomem *port, const void *buf, long count) { const u32 *tbuf = buf; if (unlikely(count <= 0)) return; - asm volatile("sync"); + + mb(); do { - *port = *tbuf++; + *(volatile u32 __force *)port = *tbuf++; } while (--count != 0); - asm volatile("sync"); + mb(); } -EXPORT_SYMBOL(_outsl_ns); +EXPORT_SYMBOL(_outsl); #define IO_CHECK_ALIGN(v,a) ((((unsigned long)(v)) & ((a) - 1)) == 0) @@ -127,7 +133,7 @@ _memset_io(volatile void __iomem *addr, int c, unsigned long n) lc |= lc << 8; lc |= lc << 16; - __asm__ __volatile__ ("sync" : : : "memory"); + mb(); while(n && !IO_CHECK_ALIGN(p, 4)) { *((volatile u8 *)p) = c; p++; @@ -143,7 +149,7 @@ _memset_io(volatile void __iomem *addr, int c, unsigned long n) p++; n--; } - __asm__ __volatile__ ("sync" : : : "memory"); + mb(); } EXPORT_SYMBOL(_memset_io); @@ -152,7 +158,7 @@ void _memcpy_fromio(void *dest, const volatile void __iomem *src, { void *vsrc = (void __force *) src; - __asm__ __volatile__ ("sync" : : : "memory"); + mb(); while(n && (!IO_CHECK_ALIGN(vsrc, 4) || !IO_CHECK_ALIGN(dest, 4))) { *((u8 *)dest) = *((volatile u8 *)vsrc); eieio(); @@ -174,7 +180,7 @@ void _memcpy_fromio(void *dest, const volatile void __iomem *src, dest++; n--; } - __asm__ __volatile__ ("sync" : : : "memory"); + mb(); } EXPORT_SYMBOL(_memcpy_fromio); @@ -182,7 +188,7 @@ void _memcpy_toio(volatile void __iomem *dest, const void *src, unsigned long n) { void *vdest = (void __force *) dest; - __asm__ __volatile__ ("sync" : : : "memory"); + mb(); while(n && (!IO_CHECK_ALIGN(vdest, 4) || !IO_CHECK_ALIGN(src, 4))) { *((volatile u8 *)vdest) = *((u8 *)src); src++; @@ -201,6 +207,6 @@ void _memcpy_toio(volatile void __iomem *dest, const void *src, unsigned long n) vdest++; n--; } - __asm__ __volatile__ ("sync" : : : "memory"); + mb(); } EXPORT_SYMBOL(_memcpy_toio); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index caebe1431596..0ce71310b7d9 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -16,6 +16,7 @@ #include <linux/mm.h> #include <linux/spinlock.h> #include <linux/string.h> +#include <linux/string_choices.h> #include <linux/dma-mapping.h> #include <linux/bitmap.h> #include <linux/iommu-helper.h> @@ -26,6 +27,7 @@ #include <linux/iommu.h> #include <linux/sched.h> #include <linux/debugfs.h> +#include <linux/vmalloc.h> #include <asm/io.h> #include <asm/iommu.h> #include <asm/pci-bridge.h> @@ -35,6 +37,7 @@ #include <asm/vio.h> #include <asm/tce.h> #include <asm/mmu_context.h> +#include <asm/ppc-pci.h> #define DBG(...) @@ -67,11 +70,9 @@ static void iommu_debugfs_add(struct iommu_table *tbl) static void iommu_debugfs_del(struct iommu_table *tbl) { char name[10]; - struct dentry *liobn_entry; sprintf(name, "%08lx", tbl->it_index); - liobn_entry = debugfs_lookup(name, iommu_debugfs_dir); - debugfs_remove(liobn_entry); + debugfs_lookup_and_remove(name, iommu_debugfs_dir); } #else static void iommu_debugfs_add(struct iommu_table *tbl){} @@ -173,17 +174,28 @@ static int fail_iommu_bus_notify(struct notifier_block *nb, return 0; } -static struct notifier_block fail_iommu_bus_notifier = { +/* + * PCI and VIO buses need separate notifier_block structs, since they're linked + * list nodes. Sharing a notifier_block would mean that any notifiers later + * registered for PCI buses would also get called by VIO buses and vice versa. + */ +static struct notifier_block fail_iommu_pci_bus_notifier = { + .notifier_call = fail_iommu_bus_notify +}; + +#ifdef CONFIG_IBMVIO +static struct notifier_block fail_iommu_vio_bus_notifier = { .notifier_call = fail_iommu_bus_notify }; +#endif static int __init fail_iommu_setup(void) { #ifdef CONFIG_PCI - bus_register_notifier(&pci_bus_type, &fail_iommu_bus_notifier); + bus_register_notifier(&pci_bus_type, &fail_iommu_pci_bus_notifier); #endif #ifdef CONFIG_IBMVIO - bus_register_notifier(&vio_bus_type, &fail_iommu_bus_notifier); + bus_register_notifier(&vio_bus_type, &fail_iommu_vio_bus_notifier); #endif return 0; @@ -519,7 +531,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, /* Convert entry to a dma_addr_t */ entry += tbl->it_offset; dma_addr = entry << tbl->it_page_shift; - dma_addr |= (s->offset & ~IOMMU_PAGE_MASK(tbl)); + dma_addr |= (vaddr & ~IOMMU_PAGE_MASK(tbl)); DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n", npages, entry, dma_addr); @@ -632,7 +644,7 @@ void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, tbl->it_ops->flush(tbl); } -static void iommu_table_clear(struct iommu_table *tbl) +void iommu_table_clear(struct iommu_table *tbl) { /* * In case of firmware assisted dump system goes through clean @@ -673,10 +685,10 @@ static void iommu_table_clear(struct iommu_table *tbl) #endif } -static void iommu_table_reserve_pages(struct iommu_table *tbl, +void iommu_table_reserve_pages(struct iommu_table *tbl, unsigned long res_start, unsigned long res_end) { - int i; + unsigned long i; WARN_ON_ONCE(res_end < res_start); /* @@ -758,8 +770,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid, iommu_table_clear(tbl); if (!welcomed) { - printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n", - novmerge ? "disabled" : "enabled"); + pr_info("IOMMU table initialized, virtual merging %s\n", + str_disabled_enabled(novmerge)); welcomed = 1; } @@ -836,12 +848,12 @@ EXPORT_SYMBOL_GPL(iommu_tce_table_put); /* Creates TCEs for a user provided buffer. The user buffer must be * contiguous real kernel storage (not vmalloc). The address passed here - * comprises a page address and offset into that page. The dma_addr_t - * returned will point to the same byte within the page as was passed in. + * is physical address into that page. The dma_addr_t returned will point + * to the same byte within the page as was passed in. */ -dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, - struct page *page, unsigned long offset, size_t size, - unsigned long mask, enum dma_data_direction direction, +dma_addr_t iommu_map_phys(struct device *dev, struct iommu_table *tbl, + phys_addr_t phys, size_t size, unsigned long mask, + enum dma_data_direction direction, unsigned long attrs) { dma_addr_t dma_handle = DMA_MAPPING_ERROR; @@ -851,7 +863,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, BUG_ON(direction == DMA_NONE); - vaddr = page_address(page) + offset; + vaddr = phys_to_virt(phys); uaddr = (unsigned long)vaddr; if (tbl) { @@ -878,7 +890,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, return dma_handle; } -void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle, +void iommu_unmap_phys(struct iommu_table *tbl, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction, unsigned long attrs) { @@ -906,6 +918,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, unsigned int order; unsigned int nio_pages, io_order; struct page *page; + int tcesize = (1 << tbl->it_page_shift); size = PAGE_ALIGN(size); order = get_order(size); @@ -932,7 +945,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, memset(ret, 0, size); /* Set up tces to cover the allocated range */ - nio_pages = size >> tbl->it_page_shift; + nio_pages = IOMMU_PAGE_ALIGN(size, tbl) >> tbl->it_page_shift; + io_order = get_iommu_order(size, tbl); mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, mask >> tbl->it_page_shift, io_order, 0); @@ -940,7 +954,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, free_pages((unsigned long)ret, order); return NULL; } - *dma_handle = mapping; + + *dma_handle = mapping | ((u64)ret & (tcesize - 1)); return ret; } @@ -951,7 +966,7 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, unsigned int nio_pages; size = PAGE_ALIGN(size); - nio_pages = size >> tbl->it_page_shift; + nio_pages = IOMMU_PAGE_ALIGN(size, tbl) >> tbl->it_page_shift; iommu_free(tbl, dma_handle, nio_pages); size = PAGE_ALIGN(size); free_pages((unsigned long)vaddr, get_order(size)); @@ -974,6 +989,23 @@ unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir) EXPORT_SYMBOL_GPL(iommu_direction_to_tce_perm); #ifdef CONFIG_IOMMU_API + +int dev_has_iommu_table(struct device *dev, void *data) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_dev **ppdev = data; + + if (!dev) + return 0; + + if (device_iommu_mapped(dev)) { + *ppdev = pdev; + return 1; + } + + return 0; +} + /* * SPAPR TCE API */ @@ -1061,10 +1093,10 @@ int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa) } EXPORT_SYMBOL_GPL(iommu_tce_check_gpa); -extern long iommu_tce_xchg_no_kill(struct mm_struct *mm, - struct iommu_table *tbl, - unsigned long entry, unsigned long *hpa, - enum dma_data_direction *direction) +long iommu_tce_xchg_no_kill(struct mm_struct *mm, + struct iommu_table *tbl, + unsigned long entry, unsigned long *hpa, + enum dma_data_direction *direction) { long ret; unsigned long size = 0; @@ -1088,59 +1120,6 @@ void iommu_tce_kill(struct iommu_table *tbl, } EXPORT_SYMBOL_GPL(iommu_tce_kill); -int iommu_take_ownership(struct iommu_table *tbl) -{ - unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; - int ret = 0; - - /* - * VFIO does not control TCE entries allocation and the guest - * can write new TCEs on top of existing ones so iommu_tce_build() - * must be able to release old pages. This functionality - * requires exchange() callback defined so if it is not - * implemented, we disallow taking ownership over the table. - */ - if (!tbl->it_ops->xchg_no_kill) - return -EINVAL; - - spin_lock_irqsave(&tbl->large_pool.lock, flags); - for (i = 0; i < tbl->nr_pools; i++) - spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); - - if (iommu_table_in_use(tbl)) { - pr_err("iommu_tce: it_map is not empty"); - ret = -EBUSY; - } else { - memset(tbl->it_map, 0xff, sz); - } - - for (i = 0; i < tbl->nr_pools; i++) - spin_unlock(&tbl->pools[i].lock); - spin_unlock_irqrestore(&tbl->large_pool.lock, flags); - - return ret; -} -EXPORT_SYMBOL_GPL(iommu_take_ownership); - -void iommu_release_ownership(struct iommu_table *tbl) -{ - unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; - - spin_lock_irqsave(&tbl->large_pool.lock, flags); - for (i = 0; i < tbl->nr_pools; i++) - spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); - - memset(tbl->it_map, 0, sz); - - iommu_table_reserve_pages(tbl, tbl->it_reserved_start, - tbl->it_reserved_end); - - for (i = 0; i < tbl->nr_pools; i++) - spin_unlock(&tbl->pools[i].lock); - spin_unlock_irqrestore(&tbl->large_pool.lock, flags); -} -EXPORT_SYMBOL_GPL(iommu_release_ownership); - int iommu_add_device(struct iommu_table_group *table_group, struct device *dev) { /* @@ -1160,25 +1139,180 @@ int iommu_add_device(struct iommu_table_group *table_group, struct device *dev) pr_debug("%s: Adding %s to iommu group %d\n", __func__, dev_name(dev), iommu_group_id(table_group->group)); - - return iommu_group_add_device(table_group->group, dev); + /* + * This is still not adding devices via the IOMMU bus notifier because + * of pcibios_init() from arch/powerpc/kernel/pci_64.c which calls + * pcibios_scan_phb() first (and this guy adds devices and triggers + * the notifier) and only then it calls pci_bus_add_devices() which + * configures DMA for buses which also creates PEs and IOMMU groups. + */ + return iommu_probe_device(dev); } EXPORT_SYMBOL_GPL(iommu_add_device); -void iommu_del_device(struct device *dev) +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) +/* + * A simple iommu_ops to allow less cruft in generic VFIO code. + */ +static int +spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain, + struct device *dev, + struct iommu_domain *old) { + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct iommu_table_group *table_group; + struct iommu_group *grp; + + /* At first attach the ownership is already set */ + if (!domain) + return 0; + + grp = iommu_group_get(dev); + table_group = iommu_group_get_iommudata(grp); /* - * Some devices might not have IOMMU table and group - * and we needn't detach them from the associated - * IOMMU groups + * The domain being set to PLATFORM from earlier + * BLOCKED. The table_group ownership has to be released. */ - if (!device_iommu_mapped(dev)) { - pr_debug("iommu_tce: skipping device %s with no tbl\n", - dev_name(dev)); - return; + table_group->ops->release_ownership(table_group, dev); + iommu_group_put(grp); + + return 0; +} + +static const struct iommu_domain_ops spapr_tce_platform_domain_ops = { + .attach_dev = spapr_tce_platform_iommu_attach_dev, +}; + +static struct iommu_domain spapr_tce_platform_domain = { + .type = IOMMU_DOMAIN_PLATFORM, + .ops = &spapr_tce_platform_domain_ops, +}; + +static int +spapr_tce_blocked_iommu_attach_dev(struct iommu_domain *platform_domain, + struct device *dev, struct iommu_domain *old) +{ + struct iommu_group *grp = iommu_group_get(dev); + struct iommu_table_group *table_group; + int ret = -EINVAL; + + /* + * FIXME: SPAPR mixes blocked and platform behaviors, the blocked domain + * also sets the dma_api ops + */ + table_group = iommu_group_get_iommudata(grp); + ret = table_group->ops->take_ownership(table_group, dev); + iommu_group_put(grp); + + return ret; +} + +static const struct iommu_domain_ops spapr_tce_blocked_domain_ops = { + .attach_dev = spapr_tce_blocked_iommu_attach_dev, +}; + +static struct iommu_domain spapr_tce_blocked_domain = { + .type = IOMMU_DOMAIN_BLOCKED, + .ops = &spapr_tce_blocked_domain_ops, +}; + +static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap) +{ + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + return true; + default: + break; } - iommu_group_remove_device(dev); + return false; +} + +static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev) +{ + struct pci_dev *pdev; + struct pci_controller *hose; + + if (!dev_is_pci(dev)) + return ERR_PTR(-ENODEV); + + pdev = to_pci_dev(dev); + hose = pdev->bus->sysdata; + + return &hose->iommu; +} + +static void spapr_tce_iommu_release_device(struct device *dev) +{ +} + +static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev) +{ + struct pci_controller *hose; + struct pci_dev *pdev; + + pdev = to_pci_dev(dev); + hose = pdev->bus->sysdata; + + if (!hose->controller_ops.device_group) + return ERR_PTR(-ENOENT); + + return hose->controller_ops.device_group(hose, pdev); } -EXPORT_SYMBOL_GPL(iommu_del_device); + +static const struct iommu_ops spapr_tce_iommu_ops = { + .default_domain = &spapr_tce_platform_domain, + .blocked_domain = &spapr_tce_blocked_domain, + .capable = spapr_tce_iommu_capable, + .probe_device = spapr_tce_iommu_probe_device, + .release_device = spapr_tce_iommu_release_device, + .device_group = spapr_tce_iommu_device_group, +}; + +static struct attribute *spapr_tce_iommu_attrs[] = { + NULL, +}; + +static struct attribute_group spapr_tce_iommu_group = { + .name = "spapr-tce-iommu", + .attrs = spapr_tce_iommu_attrs, +}; + +static const struct attribute_group *spapr_tce_iommu_groups[] = { + &spapr_tce_iommu_group, + NULL, +}; + +void ppc_iommu_register_device(struct pci_controller *phb) +{ + iommu_device_sysfs_add(&phb->iommu, phb->parent, + spapr_tce_iommu_groups, "iommu-phb%04x", + phb->global_number); + iommu_device_register(&phb->iommu, &spapr_tce_iommu_ops, + phb->parent); +} + +void ppc_iommu_unregister_device(struct pci_controller *phb) +{ + iommu_device_unregister(&phb->iommu); + iommu_device_sysfs_remove(&phb->iommu); +} + +/* + * This registers IOMMU devices of PHBs. This needs to happen + * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and + * before subsys_initcall(iommu_subsys_init). + */ +static int __init spapr_tce_setup_phb_iommus_initcall(void) +{ + struct pci_controller *hose; + + list_for_each_entry(hose, &hose_list, list_node) { + ppc_iommu_register_device(hose); + } + return 0; +} +postcore_initcall_sync(spapr_tce_setup_phb_iommus_initcall); +#endif + #endif /* CONFIG_IOMMU_API */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index c5b9ce887483..a0e8b998c9b5 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -89,69 +89,69 @@ int arch_show_interrupts(struct seq_file *p, int prec) #if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT) if (tau_initialized) { - seq_printf(p, "%*s: ", prec, "TAU"); + seq_printf(p, "%*s:", prec, "TAU"); for_each_online_cpu(j) - seq_printf(p, "%10u ", tau_interrupts(j)); + seq_put_decimal_ull_width(p, " ", tau_interrupts(j), 10); seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n"); } #endif /* CONFIG_PPC32 && CONFIG_TAU_INT */ - seq_printf(p, "%*s: ", prec, "LOC"); + seq_printf(p, "%*s:", prec, "LOC"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_event); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).timer_irqs_event, 10); seq_printf(p, " Local timer interrupts for timer event device\n"); - seq_printf(p, "%*s: ", prec, "BCT"); + seq_printf(p, "%*s:", prec, "BCT"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).broadcast_irqs_event); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).broadcast_irqs_event, 10); seq_printf(p, " Broadcast timer interrupts for timer event device\n"); - seq_printf(p, "%*s: ", prec, "LOC"); + seq_printf(p, "%*s:", prec, "LOC"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_others); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).timer_irqs_others, 10); seq_printf(p, " Local timer interrupts for others\n"); - seq_printf(p, "%*s: ", prec, "SPU"); + seq_printf(p, "%*s:", prec, "SPU"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).spurious_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).spurious_irqs, 10); seq_printf(p, " Spurious interrupts\n"); - seq_printf(p, "%*s: ", prec, "PMI"); + seq_printf(p, "%*s:", prec, "PMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).pmu_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).pmu_irqs, 10); seq_printf(p, " Performance monitoring interrupts\n"); - seq_printf(p, "%*s: ", prec, "MCE"); + seq_printf(p, "%*s:", prec, "MCE"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).mce_exceptions, 10); seq_printf(p, " Machine check exceptions\n"); #ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_HVMODE)) { - seq_printf(p, "%*s: ", prec, "HMI"); + seq_printf(p, "%*s:", prec, "HMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", paca_ptrs[j]->hmi_irqs); + seq_put_decimal_ull_width(p, " ", paca_ptrs[j]->hmi_irqs, 10); seq_printf(p, " Hypervisor Maintenance Interrupts\n"); } #endif - seq_printf(p, "%*s: ", prec, "NMI"); + seq_printf(p, "%*s:", prec, "NMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).sreset_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).sreset_irqs, 10); seq_printf(p, " System Reset interrupts\n"); #ifdef CONFIG_PPC_WATCHDOG - seq_printf(p, "%*s: ", prec, "WDG"); + seq_printf(p, "%*s:", prec, "WDG"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).soft_nmi_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).soft_nmi_irqs, 10); seq_printf(p, " Watchdog soft-NMI interrupts\n"); #endif #ifdef CONFIG_PPC_DOORBELL if (cpu_has_feature(CPU_FTR_DBELL)) { - seq_printf(p, "%*s: ", prec, "DBL"); + seq_printf(p, "%*s:", prec, "DBL"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).doorbell_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).doorbell_irqs, 10); seq_printf(p, " Doorbell interrupts\n"); } #endif @@ -206,7 +206,11 @@ static __always_inline void call_do_softirq(const void *sp) asm volatile ( PPC_STLU " %%r1, %[offset](%[sp]) ;" "mr %%r1, %[sp] ;" +#ifdef CONFIG_PPC_KERNEL_PCREL + "bl %[callee]@notoc ;" +#else "bl %[callee] ;" +#endif PPC_LL " %%r1, 0(%%r1) ;" : // Outputs : // Inputs @@ -238,7 +242,7 @@ static void __do_irq(struct pt_regs *regs, unsigned long oldsp) irq = static_call(ppc_get_irq)(); /* We can hard enable interrupts now to allow perf interrupts */ - if (should_hard_irq_enable()) + if (should_hard_irq_enable(regs)) do_hard_irq_enable(); /* And finally process it */ @@ -259,7 +263,11 @@ static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) PPC_STLU " %%r1, %[offset](%[sp]) ;" "mr %%r4, %%r1 ;" "mr %%r1, %[sp] ;" +#ifdef CONFIG_PPC_KERNEL_PCREL + "bl %[callee]@notoc ;" +#else "bl %[callee] ;" +#endif PPC_LL " %%r1, 0(%%r1) ;" : // Outputs "+r" (r3) @@ -276,15 +284,14 @@ static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) void __do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); - void *cursp, *irqsp, *sirqsp; + void *cursp, *irqsp; /* Switch to the irq stack to handle this */ cursp = (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); irqsp = hardirq_ctx[raw_smp_processor_id()]; - sirqsp = softirq_ctx[raw_smp_processor_id()]; /* Already there ? If not switch stack and call */ - if (unlikely(cursp == irqsp || cursp == sirqsp)) + if (unlikely(cursp == irqsp)) __do_irq(regs, current_stack_pointer); else call_do_irq(regs, irqsp); @@ -326,7 +333,7 @@ void __init init_IRQ(void) static_call_update(ppc_get_irq, ppc_md.get_irq); } -#ifdef CONFIG_BOOKE_OR_40x +#ifdef CONFIG_BOOKE void *critirq_ctx[NR_CPUS] __read_mostly; void *dbgirq_ctx[NR_CPUS] __read_mostly; void *mcheckirq_ctx[NR_CPUS] __read_mostly; diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c index eb2b380e52a0..d5c48d1b0a31 100644 --- a/arch/powerpc/kernel/irq_64.c +++ b/arch/powerpc/kernel/irq_64.c @@ -70,22 +70,19 @@ int distribute_irqs = 1; static inline void next_interrupt(struct pt_regs *regs) { - /* - * Softirq processing can enable/disable irqs, which will leave - * MSR[EE] enabled and the soft mask set to IRQS_DISABLED. Fix - * this up. - */ - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - else - irq_soft_mask_set(IRQS_ALL_DISABLED); + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + WARN_ON(!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)); + WARN_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); + } /* * We are responding to the next interrupt, so interrupt-off * latencies should be reset here. */ + lockdep_hardirq_exit(); trace_hardirqs_on(); trace_hardirqs_off(); + lockdep_hardirq_enter(); } static inline bool irq_happened_test_and_clear(u8 irq) @@ -97,22 +94,11 @@ static inline bool irq_happened_test_and_clear(u8 irq) return false; } -void replay_soft_interrupts(void) +static __no_kcsan void __replay_soft_interrupts(void) { struct pt_regs regs; /* - * Be careful here, calling these interrupt handlers can cause - * softirqs to be raised, which they may run when calling irq_exit, - * which will cause local_irq_enable() to be run, which can then - * recurse into this function. Don't keep any state across - * interrupt handler calls which may change underneath us. - * - * Softirqs can not be disabled over replay to stop this recursion - * because interrupts taken in idle code may require RCU softirq - * to run in the irq RCU tracking context. This is a hard problem - * to fix without changes to the softirq or idle layer. - * * We use local_paca rather than get_paca() to avoid all the * debug_smp_processor_id() business in this low level function. */ @@ -120,13 +106,20 @@ void replay_soft_interrupts(void) if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { WARN_ON_ONCE(mfmsr() & MSR_EE); WARN_ON(!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)); + WARN_ON(local_paca->irq_happened & PACA_IRQ_REPLAYING); } + /* + * PACA_IRQ_REPLAYING prevents interrupt handlers from enabling + * MSR[EE] to get PMIs, which can result in more IRQs becoming + * pending. + */ + local_paca->irq_happened |= PACA_IRQ_REPLAYING; + ppc_save_regs(®s); regs.softe = IRQS_ENABLED; regs.msr |= MSR_EE; -again: /* * Force the delivery of pending soft-disabled interrupts on PS3. * Any HV call will have this side effect. @@ -175,17 +168,18 @@ again: next_interrupt(®s); } - /* - * Softirq processing can enable and disable interrupts, which can - * result in new irqs becoming pending. Must keep looping until we - * have cleared out all pending interrupts. - */ - if (local_paca->irq_happened & ~PACA_IRQ_HARD_DIS) - goto again; + local_paca->irq_happened &= ~PACA_IRQ_REPLAYING; +} + +__no_kcsan void replay_soft_interrupts(void) +{ + irq_enter(); /* See comment in arch_local_irq_restore */ + __replay_soft_interrupts(); + irq_exit(); } #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP) -static inline void replay_soft_interrupts_irqrestore(void) +static inline __no_kcsan void replay_soft_interrupts_irqrestore(void) { unsigned long kuap_state = get_kuap(); @@ -200,16 +194,16 @@ static inline void replay_soft_interrupts_irqrestore(void) if (kuap_state != AMR_KUAP_BLOCKED) set_kuap(AMR_KUAP_BLOCKED); - replay_soft_interrupts(); + __replay_soft_interrupts(); if (kuap_state != AMR_KUAP_BLOCKED) set_kuap(kuap_state); } #else -#define replay_soft_interrupts_irqrestore() replay_soft_interrupts() +#define replay_soft_interrupts_irqrestore() __replay_soft_interrupts() #endif -notrace void arch_local_irq_restore(unsigned long mask) +notrace __no_kcsan void arch_local_irq_restore(unsigned long mask) { unsigned char irq_happened; @@ -219,9 +213,13 @@ notrace void arch_local_irq_restore(unsigned long mask) return; } - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(in_nmi() || in_hardirq()); + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + WARN_ON_ONCE(in_nmi()); + WARN_ON_ONCE(in_hardirq()); + WARN_ON_ONCE(local_paca->irq_happened & PACA_IRQ_REPLAYING); + } +again: /* * After the stb, interrupts are unmasked and there are no interrupts * pending replay. The restart sequence makes this atomic with @@ -232,7 +230,7 @@ notrace void arch_local_irq_restore(unsigned long mask) * This allows interrupts to be unmasked without hard disabling, and * also without new hard interrupts coming in ahead of pending ones. */ - asm_volatile_goto( + asm goto( "1: \n" " lbz 9,%0(13) \n" " cmpwi 9,0 \n" @@ -248,6 +246,12 @@ notrace void arch_local_irq_restore(unsigned long mask) if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) WARN_ON_ONCE(!(mfmsr() & MSR_EE)); + /* + * If we came here from the replay below, we might have a preempt + * pending (due to preempt_enable_no_resched()). Have to check now. + */ + preempt_check_resched(); + return; happened: @@ -261,6 +265,7 @@ happened: irq_soft_mask_set(IRQS_ENABLED); local_paca->irq_happened = 0; __hard_irq_enable(); + preempt_check_resched(); return; } @@ -296,12 +301,38 @@ happened: irq_soft_mask_set(IRQS_ALL_DISABLED); trace_hardirqs_off(); + /* + * Now enter interrupt context. The interrupt handlers themselves + * also call irq_enter/exit (which is okay, they can nest). But call + * it here now to hold off softirqs until the below irq_exit(). If + * we allowed replayed handlers to run softirqs, that enables irqs, + * which must replay interrupts, which recurses in here and makes + * things more complicated. The recursion is limited to 2, and it can + * be made to work, but it's complicated. + * + * local_bh_disable can not be used here because interrupts taken in + * idle are not in the right context (RCU, tick, etc) to run softirqs + * so irq_enter must be called. + */ + irq_enter(); + replay_soft_interrupts_irqrestore(); + irq_exit(); + + if (unlikely(local_paca->irq_happened != PACA_IRQ_HARD_DIS)) { + /* + * The softirq processing in irq_exit() may enable interrupts + * temporarily, which can result in MSR[EE] being enabled and + * more irqs becoming pending. Go around again if that happens. + */ + trace_hardirqs_on(); + preempt_enable_no_resched(); + goto again; + } + trace_hardirqs_on(); irq_soft_mask_set(IRQS_ENABLED); - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON(local_paca->irq_happened != PACA_IRQ_HARD_DIS); local_paca->irq_happened = 0; __hard_irq_enable(); preempt_enable(); @@ -317,13 +348,12 @@ EXPORT_SYMBOL(arch_local_irq_restore); * already the case when ppc_md.power_save is called). The function * will return whether to enter power save or just return. * - * In the former case, it will have notified lockdep of interrupts - * being re-enabled and generally sanitized the lazy irq state, - * and in the latter case it will leave with interrupts hard + * In the former case, it will have generally sanitized the lazy irq + * state, and in the latter case it will leave with interrupts hard * disabled and marked as such, so the local_irq_enable() call * in arch_cpu_idle() will properly re-enable everything. */ -bool prep_irq_for_idle(void) +__cpuidle bool prep_irq_for_idle(void) { /* * First we need to hard disable to ensure no interrupt @@ -339,9 +369,6 @@ bool prep_irq_for_idle(void) if (lazy_irq_pending()) return false; - /* Tell lockdep we are about to re-enable */ - trace_hardirqs_on(); - /* * Mark interrupts as soft-enabled and clear the * PACA_IRQ_HARD_DIS from the pending mask since we diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c index dc746611ebc0..5c064485197a 100644 --- a/arch/powerpc/kernel/isa-bridge.c +++ b/arch/powerpc/kernel/isa-bridge.c @@ -46,8 +46,8 @@ static void remap_isa_base(phys_addr_t pa, unsigned long size) WARN_ON_ONCE(size & ~PAGE_MASK); if (slab_is_available()) { - if (ioremap_page_range(ISA_IO_BASE, ISA_IO_BASE + size, pa, - pgprot_noncached(PAGE_KERNEL))) + if (vmap_page_range(ISA_IO_BASE, ISA_IO_BASE + size, pa, + pgprot_noncached(PAGE_KERNEL))) vunmap_range(ISA_IO_BASE, ISA_IO_BASE + size); } else { early_ioremap_range(ISA_IO_BASE, pa, size, @@ -55,80 +55,50 @@ static void remap_isa_base(phys_addr_t pa, unsigned long size) } } -static void pci_process_ISA_OF_ranges(struct device_node *isa_node, - unsigned long phb_io_base_phys) +static int process_ISA_OF_ranges(struct device_node *isa_node, + unsigned long phb_io_base_phys) { - /* We should get some saner parsing here and remove these structs */ - struct pci_address { - u32 a_hi; - u32 a_mid; - u32 a_lo; - }; - - struct isa_address { - u32 a_hi; - u32 a_lo; - }; - - struct isa_range { - struct isa_address isa_addr; - struct pci_address pci_addr; - unsigned int size; - }; - - const struct isa_range *range; - unsigned long pci_addr; - unsigned int isa_addr; unsigned int size; - int rlen = 0; + struct of_range_parser parser; + struct of_range range; - range = of_get_property(isa_node, "ranges", &rlen); - if (range == NULL || (rlen < sizeof(struct isa_range))) + if (of_range_parser_init(&parser, isa_node)) goto inval_range; - /* From "ISA Binding to 1275" - * The ranges property is laid out as an array of elements, - * each of which comprises: - * cells 0 - 1: an ISA address - * cells 2 - 4: a PCI address - * (size depending on dev->n_addr_cells) - * cell 5: the size of the range - */ - if ((range->isa_addr.a_hi & ISA_SPACE_MASK) != ISA_SPACE_IO) { - range++; - rlen -= sizeof(struct isa_range); - if (rlen < sizeof(struct isa_range)) - goto inval_range; - } - if ((range->isa_addr.a_hi & ISA_SPACE_MASK) != ISA_SPACE_IO) - goto inval_range; + for_each_of_range(&parser, &range) { + if ((range.flags & ISA_SPACE_MASK) != ISA_SPACE_IO) + continue; - isa_addr = range->isa_addr.a_lo; - pci_addr = (unsigned long) range->pci_addr.a_mid << 32 | - range->pci_addr.a_lo; + if (range.cpu_addr == OF_BAD_ADDR) { + pr_err("ISA: Bad CPU mapping: %s\n", __func__); + return -EINVAL; + } - /* Assume these are both zero. Note: We could fix that and - * do a proper parsing instead ... oh well, that will do for - * now as nobody uses fancy mappings for ISA bridges - */ - if ((pci_addr != 0) || (isa_addr != 0)) { - printk(KERN_ERR "unexpected isa to pci mapping: %s\n", - __func__); - return; - } + /* We need page alignment */ + if ((range.bus_addr & ~PAGE_MASK) || (range.cpu_addr & ~PAGE_MASK)) { + pr_warn("ISA: bridge %pOF has non aligned IO range\n", isa_node); + return -EINVAL; + } - /* Align size and make sure it's cropped to 64K */ - size = PAGE_ALIGN(range->size); - if (size > 0x10000) - size = 0x10000; + /* Align size and make sure it's cropped to 64K */ + size = PAGE_ALIGN(range.size); + if (size > 0x10000) + size = 0x10000; - remap_isa_base(phb_io_base_phys, size); - return; + if (!phb_io_base_phys) + phb_io_base_phys = range.cpu_addr; + + remap_isa_base(phb_io_base_phys, size); + return 0; + } inval_range: - printk(KERN_ERR "no ISA IO ranges or unexpected isa range, " - "mapping 64k\n"); - remap_isa_base(phb_io_base_phys, 0x10000); + if (phb_io_base_phys) { + pr_err("no ISA IO ranges or unexpected isa range, mapping 64k\n"); + remap_isa_base(phb_io_base_phys, 0x10000); + return 0; + } + return -EINVAL; } @@ -170,7 +140,7 @@ void __init isa_bridge_find_early(struct pci_controller *hose) isa_bridge_devnode = np; /* Now parse the "ranges" property and setup the ISA mapping */ - pci_process_ISA_OF_ranges(np, hose->io_base_phys); + process_ISA_OF_ranges(np, hose->io_base_phys); /* Set the global ISA io base to indicate we have an ISA bridge */ isa_io_base = ISA_IO_BASE; @@ -186,75 +156,15 @@ void __init isa_bridge_find_early(struct pci_controller *hose) */ void __init isa_bridge_init_non_pci(struct device_node *np) { - const __be32 *ranges, *pbasep = NULL; - int rlen, i, rs; - u32 na, ns, pna; - u64 cbase, pbase, size = 0; + int ret; /* If we already have an ISA bridge, bail off */ if (isa_bridge_devnode != NULL) return; - pna = of_n_addr_cells(np); - if (of_property_read_u32(np, "#address-cells", &na) || - of_property_read_u32(np, "#size-cells", &ns)) { - pr_warn("ISA: Non-PCI bridge %pOF is missing address format\n", - np); - return; - } - - /* Check it's a supported address format */ - if (na != 2 || ns != 1) { - pr_warn("ISA: Non-PCI bridge %pOF has unsupported address format\n", - np); - return; - } - rs = na + ns + pna; - - /* Grab the ranges property */ - ranges = of_get_property(np, "ranges", &rlen); - if (ranges == NULL || rlen < rs) { - pr_warn("ISA: Non-PCI bridge %pOF has absent or invalid ranges\n", - np); - return; - } - - /* Parse it. We are only looking for IO space */ - for (i = 0; (i + rs - 1) < rlen; i += rs) { - if (be32_to_cpup(ranges + i) != 1) - continue; - cbase = be32_to_cpup(ranges + i + 1); - size = of_read_number(ranges + i + na + pna, ns); - pbasep = ranges + i + na; - break; - } - - /* Got something ? */ - if (!size || !pbasep) { - pr_warn("ISA: Non-PCI bridge %pOF has no usable IO range\n", - np); + ret = process_ISA_OF_ranges(np, 0); + if (ret) return; - } - - /* Align size and make sure it's cropped to 64K */ - size = PAGE_ALIGN(size); - if (size > 0x10000) - size = 0x10000; - - /* Map pbase */ - pbase = of_translate_address(np, pbasep); - if (pbase == OF_BAD_ADDR) { - pr_warn("ISA: Non-PCI bridge %pOF failed to translate IO base\n", - np); - return; - } - - /* We need page alignment */ - if ((cbase & ~PAGE_MASK) || (pbase & ~PAGE_MASK)) { - pr_warn("ISA: Non-PCI bridge %pOF has non aligned IO range\n", - np); - return; - } /* Got it */ isa_bridge_devnode = np; @@ -263,7 +173,6 @@ void __init isa_bridge_init_non_pci(struct device_node *np) * and map it */ isa_io_base = ISA_IO_BASE; - remap_isa_base(pbase, size); pr_debug("ISA: Non-PCI bridge is %pOF\n", np); } @@ -282,7 +191,7 @@ static void isa_bridge_find_late(struct pci_dev *pdev, isa_bridge_pcidev = pdev; /* Now parse the "ranges" property and setup the ISA mapping */ - pci_process_ISA_OF_ranges(devnode, hose->io_base_phys); + process_ISA_OF_ranges(devnode, hose->io_base_phys); /* Set the global ISA io base to indicate we have an ISA bridge */ isa_io_base = ISA_IO_BASE; diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index 5277cf582c16..2659e1ac8604 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -5,7 +5,7 @@ #include <linux/kernel.h> #include <linux/jump_label.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/inst.h> void arch_jump_label_transform(struct jump_entry *entry, diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index ebe4d1645ca1..5081334b7bd2 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -21,7 +21,7 @@ #include <asm/processor.h> #include <asm/machdep.h> #include <asm/debug.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <linux/slab.h> #include <asm/inst.h> @@ -45,7 +45,7 @@ static struct hard_trap_info { 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */ { 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */ { 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */ -#ifdef CONFIG_BOOKE_OR_40x +#ifdef CONFIG_BOOKE { 0x2002, 0x05 /* SIGTRAP */ }, /* debug */ #if defined(CONFIG_PPC_85xx) { 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */ @@ -64,7 +64,7 @@ static struct hard_trap_info { 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */ { 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */ #endif -#else /* !CONFIG_BOOKE_OR_40x */ +#else /* !CONFIG_BOOKE */ { 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */ #if defined(CONFIG_PPC_8xx) { 0x1000, 0x04 /* SIGILL */ }, /* software emulation */ diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c index 072ebe7f290b..f8208c027148 100644 --- a/arch/powerpc/kernel/kprobes-ftrace.c +++ b/arch/powerpc/kernel/kprobes-ftrace.c @@ -21,6 +21,9 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, struct pt_regs *regs; int bit; + if (unlikely(kprobe_ftrace_disabled)) + return; + bit = ftrace_test_recursion_trylock(nip, parent_nip); if (bit < 0) return; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index b20ee72e873a..c0d9f12cb441 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -19,9 +19,9 @@ #include <linux/extable.h> #include <linux/kdebug.h> #include <linux/slab.h> -#include <linux/moduleloader.h> #include <linux/set_memory.h> -#include <asm/code-patching.h> +#include <linux/execmem.h> +#include <asm/text-patching.h> #include <asm/cacheflush.h> #include <asm/sstep.h> #include <asm/sections.h> @@ -105,41 +105,25 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset) return addr; } -static bool arch_kprobe_on_func_entry(unsigned long offset) +static bool arch_kprobe_on_func_entry(unsigned long addr, unsigned long offset) { -#ifdef CONFIG_PPC64_ELF_ABI_V2 -#ifdef CONFIG_KPROBES_ON_FTRACE - return offset <= 16; -#else - return offset <= 8; -#endif -#else + unsigned long ip = ftrace_location(addr); + + if (ip) + return offset <= (ip - addr); + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + return offset <= 8; return !offset; -#endif } /* XXX try and fold the magic of kprobe_lookup_name() in this */ kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, bool *on_func_entry) { - *on_func_entry = arch_kprobe_on_func_entry(offset); + *on_func_entry = arch_kprobe_on_func_entry(addr, offset); return (kprobe_opcode_t *)(addr + offset); } -void *alloc_insn_page(void) -{ - void *page; - - page = module_alloc(PAGE_SIZE); - if (!page) - return NULL; - - if (strict_module_rwx_enabled()) - set_memory_rox((unsigned long)page, 1); - - return page; -} - int arch_prepare_kprobe(struct kprobe *p) { int ret = 0; @@ -242,16 +226,6 @@ static nokprobe_inline void set_current_kprobe(struct kprobe *p, struct pt_regs kcb->kprobe_saved_msr = regs->msr; } -void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) -{ - ri->ret_addr = (kprobe_opcode_t *)regs->link; - ri->fp = NULL; - - /* Replace the return addr with trampoline addr */ - regs->link = (unsigned long)__kretprobe_trampoline; -} -NOKPROBE_SYMBOL(arch_prepare_kretprobe); - static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) { int ret; @@ -409,49 +383,6 @@ no_kprobe: NOKPROBE_SYMBOL(kprobe_handler); /* - * Function return probe trampoline: - * - init_kprobes() establishes a probepoint here - * - When the probed function returns, this probe - * causes the handlers to fire - */ -asm(".global __kretprobe_trampoline\n" - ".type __kretprobe_trampoline, @function\n" - "__kretprobe_trampoline:\n" - "nop\n" - "blr\n" - ".size __kretprobe_trampoline, .-__kretprobe_trampoline\n"); - -/* - * Called when the probe at kretprobe trampoline is hit - */ -static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) -{ - unsigned long orig_ret_address; - - orig_ret_address = __kretprobe_trampoline_handler(regs, NULL); - /* - * We get here through one of two paths: - * 1. by taking a trap -> kprobe_handler() -> here - * 2. by optprobe branch -> optimized_callback() -> opt_pre_handler() -> here - * - * When going back through (1), we need regs->nip to be setup properly - * as it is used to determine the return address from the trap. - * For (2), since nip is not honoured with optprobes, we instead setup - * the link register properly so that the subsequent 'blr' in - * __kretprobe_trampoline jumps back to the right instruction. - * - * For nip, we should set the address to the previous instruction since - * we end up emulating it in kprobe_handler(), which increments the nip - * again. - */ - regs_set_return_ip(regs, orig_ret_address - 4); - regs->link = orig_ret_address; - - return 0; -} -NOKPROBE_SYMBOL(trampoline_probe_handler); - -/* * Called after single-stepping. p->addr is the address of the * instruction whose first byte has been replaced by the "breakpoint" * instruction. To avoid the SMP problems that can occur when we @@ -553,19 +484,9 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) } NOKPROBE_SYMBOL(kprobe_fault_handler); -static struct kprobe trampoline_p = { - .addr = (kprobe_opcode_t *) &__kretprobe_trampoline, - .pre_handler = trampoline_probe_handler -}; - -int __init arch_init_kprobes(void) -{ - return register_kprobe(&trampoline_p); -} - int arch_trampoline_kprobe(struct kprobe *p) { - if (p->addr == (kprobe_opcode_t *)&__kretprobe_trampoline) + if (p->addr == (kprobe_opcode_t *)&arch_rethook_trampoline) return 1; return 0; diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 5b3c093611ba..7209d00a9c25 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -632,19 +632,19 @@ static void __init kvm_check_ins(u32 *inst, u32 features) #endif } - switch (inst_no_rt & ~KVM_MASK_RB) { #ifdef CONFIG_PPC_BOOK3S_32 + switch (inst_no_rt & ~KVM_MASK_RB) { case KVM_INST_MTSRIN: if (features & KVM_MAGIC_FEAT_SR) { u32 inst_rb = _inst & KVM_MASK_RB; kvm_patch_ins_mtsrin(inst, inst_rt, inst_rb); } break; -#endif } +#endif - switch (_inst) { #ifdef CONFIG_BOOKE + switch (_inst) { case KVM_INST_WRTEEI_0: kvm_patch_ins_wrteei_0(inst); break; @@ -652,8 +652,8 @@ static void __init kvm_check_ins(u32 *inst, u32 features) case KVM_INST_WRTEEI_1: kvm_patch_ins_wrtee(inst, 0, 1); break; -#endif } +#endif } extern u32 kvm_template_start[]; diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index f048c424c525..ae1906bfe8a5 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -5,8 +5,8 @@ #include <linux/serial_core.h> #include <linux/console.h> #include <linux/pci.h> +#include <linux/of.h> #include <linux/of_address.h> -#include <linux/of_device.h> #include <linux/of_irq.h> #include <linux/serial_reg.h> #include <asm/io.h> @@ -54,9 +54,10 @@ static int legacy_serial_console = -1; static const upf_t legacy_port_flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ | UPF_FIXED_PORT; -static unsigned int tsi_serial_in(struct uart_port *p, int offset) +static u32 tsi_serial_in(struct uart_port *p, unsigned int offset) { - unsigned int tmp; + u32 tmp; + offset = offset << p->regshift; if (offset == UART_IIR) { tmp = readl(p->membase + (UART_IIR & ~3)); @@ -65,7 +66,7 @@ static unsigned int tsi_serial_in(struct uart_port *p, int offset) return readb(p->membase + offset); } -static void tsi_serial_out(struct uart_port *p, int offset, int value) +static void tsi_serial_out(struct uart_port *p, unsigned int offset, u32 value) { offset = offset << p->regshift; if (!((offset == UART_IER) && (value & UART_IER_UUE))) @@ -77,6 +78,8 @@ static int __init add_legacy_port(struct device_node *np, int want_index, phys_addr_t taddr, unsigned long irq, upf_t flags, int irq_check_parent) { + struct plat_serial8250_port *legacy_port; + struct legacy_serial_info *legacy_info; const __be32 *clk, *spd, *rs; u32 clock = BASE_BAUD * 16; u32 shift = 0; @@ -110,16 +113,17 @@ static int __init add_legacy_port(struct device_node *np, int want_index, if (index >= legacy_serial_count) legacy_serial_count = index + 1; + legacy_port = &legacy_serial_ports[index]; + legacy_info = &legacy_serial_infos[index]; + /* Check if there is a port who already claimed our slot */ - if (legacy_serial_infos[index].np != NULL) { + if (legacy_info->np != NULL) { /* if we still have some room, move it, else override */ if (legacy_serial_count < MAX_LEGACY_SERIAL_PORTS) { printk(KERN_DEBUG "Moved legacy port %d -> %d\n", index, legacy_serial_count); - legacy_serial_ports[legacy_serial_count] = - legacy_serial_ports[index]; - legacy_serial_infos[legacy_serial_count] = - legacy_serial_infos[index]; + legacy_serial_ports[legacy_serial_count] = *legacy_port; + legacy_serial_infos[legacy_serial_count] = *legacy_info; legacy_serial_count++; } else { printk(KERN_DEBUG "Replacing legacy port %d\n", index); @@ -127,36 +131,32 @@ static int __init add_legacy_port(struct device_node *np, int want_index, } /* Now fill the entry */ - memset(&legacy_serial_ports[index], 0, - sizeof(struct plat_serial8250_port)); + memset(legacy_port, 0, sizeof(*legacy_port)); if (iotype == UPIO_PORT) - legacy_serial_ports[index].iobase = base; + legacy_port->iobase = base; else - legacy_serial_ports[index].mapbase = base; - - legacy_serial_ports[index].iotype = iotype; - legacy_serial_ports[index].uartclk = clock; - legacy_serial_ports[index].irq = irq; - legacy_serial_ports[index].flags = flags; - legacy_serial_ports[index].regshift = shift; - legacy_serial_infos[index].taddr = taddr; - legacy_serial_infos[index].np = of_node_get(np); - legacy_serial_infos[index].clock = clock; - legacy_serial_infos[index].speed = spd ? be32_to_cpup(spd) : 0; - legacy_serial_infos[index].irq_check_parent = irq_check_parent; + legacy_port->mapbase = base; + + legacy_port->iotype = iotype; + legacy_port->uartclk = clock; + legacy_port->irq = irq; + legacy_port->flags = flags; + legacy_port->regshift = shift; + legacy_info->taddr = taddr; + legacy_info->np = of_node_get(np); + legacy_info->clock = clock; + legacy_info->speed = spd ? be32_to_cpup(spd) : 0; + legacy_info->irq_check_parent = irq_check_parent; if (iotype == UPIO_TSI) { - legacy_serial_ports[index].serial_in = tsi_serial_in; - legacy_serial_ports[index].serial_out = tsi_serial_out; + legacy_port->serial_in = tsi_serial_in; + legacy_port->serial_out = tsi_serial_out; } - printk(KERN_DEBUG "Found legacy serial port %d for %pOF\n", - index, np); - printk(KERN_DEBUG " %s=%llx, taddr=%llx, irq=%lx, clk=%d, speed=%d\n", + printk(KERN_DEBUG "Found legacy serial port %d for %pOF\n", index, np); + printk(KERN_DEBUG " %s=%pa, taddr=%pa, irq=%lx, clk=%d, speed=%d\n", (iotype == UPIO_PORT) ? "port" : "mem", - (unsigned long long)base, (unsigned long long)taddr, irq, - legacy_serial_ports[index].uartclk, - legacy_serial_infos[index].speed); + &base, &taddr, irq, legacy_port->uartclk, legacy_info->speed); return index; } @@ -171,15 +171,15 @@ static int __init add_legacy_soc_port(struct device_node *np, /* We only support ports that have a clock frequency properly * encoded in the device-tree. */ - if (of_get_property(np, "clock-frequency", NULL) == NULL) + if (!of_property_present(np, "clock-frequency")) return -1; /* if reg-offset don't try to use it */ - if ((of_get_property(np, "reg-offset", NULL) != NULL)) + if (of_property_present(np, "reg-offset")) return -1; /* if rtas uses this device, don't try to use it as well */ - if (of_get_property(np, "used-by-rtas", NULL) != NULL) + if (of_property_read_bool(np, "used-by-rtas")) return -1; /* Get the address */ @@ -237,7 +237,7 @@ static int __init add_legacy_isa_port(struct device_node *np, * Note: Don't even try on P8 lpc, we know it's not directly mapped */ if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc") || - of_get_property(isa_brg, "ranges", NULL)) { + of_property_present(isa_brg, "ranges")) { taddr = of_translate_address(np, reg); if (taddr == OF_BAD_ADDR) taddr = 0; @@ -268,7 +268,7 @@ static int __init add_legacy_pci_port(struct device_node *np, * compatible UARTs on PCI need all sort of quirks (port offsets * etc...) that this code doesn't know about */ - if (of_get_property(np, "clock-frequency", NULL) == NULL) + if (!of_property_present(np, "clock-frequency")) return -1; /* Get the PCI address. Assume BAR 0 */ @@ -508,12 +508,16 @@ static void __init fixup_port_irq(int index, port->irq = virq; -#ifdef CONFIG_SERIAL_8250_FSL - if (of_device_is_compatible(np, "fsl,ns16550")) { - port->handle_irq = fsl8250_handle_irq; - port->has_sysrq = IS_ENABLED(CONFIG_SERIAL_8250_CONSOLE); + if (IS_ENABLED(CONFIG_SERIAL_8250) && + of_device_is_compatible(np, "fsl,ns16550")) { + if (IS_REACHABLE(CONFIG_SERIAL_8250_FSL)) { + port->handle_irq = fsl8250_handle_irq; + port->has_sysrq = IS_ENABLED(CONFIG_SERIAL_8250_CONSOLE); + } else { + pr_warn_once("Not activating Freescale specific workaround for device %pOFP\n", + np); + } } -#endif } static void __init fixup_port_pio(int index, diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 6c5d30fba766..219f28637a3e 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -131,6 +131,13 @@ void save_mce_event(struct pt_regs *regs, long handled, if (mce->error_type == MCE_ERROR_TYPE_UE) mce->u.ue_error.ignore_event = mce_err->ignore_event; + /* + * Raise irq work, So that we don't miss to log the error for + * unrecoverable errors. + */ + if (mce->disposition == MCE_DISPOSITION_NOT_RECOVERED) + mce_irq_work_queue(); + if (!addr) return; @@ -233,9 +240,6 @@ static void machine_check_ue_event(struct machine_check_event *evt) } memcpy(&local_paca->mce_info->mce_ue_event_queue[index], evt, sizeof(*evt)); - - /* Queue work to process this event later. */ - mce_irq_work_queue(); } /* diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index fb7de3543c03..29e1440d14cc 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -10,11 +10,11 @@ * * setjmp/longjmp code by Paul Mackerras. */ +#include <linux/export.h> #include <asm/ppc_asm.h> #include <asm/unistd.h> #include <asm/asm-compat.h> #include <asm/asm-offsets.h> -#include <asm/export.h> .text diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index daf8f87d2372..acb727f54e9d 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -8,6 +8,7 @@ * */ +#include <linux/export.h> #include <linux/sys.h> #include <asm/unistd.h> #include <asm/errno.h> @@ -22,38 +23,11 @@ #include <asm/processor.h> #include <asm/bug.h> #include <asm/ptrace.h> -#include <asm/export.h> #include <asm/feature-fixups.h> .text /* - * This returns the high 64 bits of the product of two 64-bit numbers. - */ -_GLOBAL(mulhdu) - cmpwi r6,0 - cmpwi cr1,r3,0 - mr r10,r4 - mulhwu r4,r4,r5 - beq 1f - mulhwu r0,r10,r6 - mullw r7,r10,r5 - addc r7,r0,r7 - addze r4,r4 -1: beqlr cr1 /* all done if high part of A is 0 */ - mullw r9,r3,r5 - mulhwu r10,r3,r5 - beq 2f - mullw r0,r3,r6 - mulhwu r8,r3,r6 - addc r7,r0,r7 - adde r4,r4,r8 - addze r10,r10 -2: addc r4,r4,r9 - addze r3,r10 - blr - -/* * reloc_got2 runs through the .got2 section adding an offset * to each entry. */ @@ -176,46 +150,6 @@ _GLOBAL(low_choose_7447a_dfs) #endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_PPC_BOOK3S_32 */ -#ifdef CONFIG_40x - -/* - * Do an IO access in real mode - */ -_GLOBAL(real_readb) - mfmsr r7 - rlwinm r0,r7,0,~MSR_DR - sync - mtmsr r0 - sync - isync - lbz r3,0(r3) - sync - mtmsr r7 - sync - isync - blr -_ASM_NOKPROBE_SYMBOL(real_readb) - - /* - * Do an IO access in real mode - */ -_GLOBAL(real_writeb) - mfmsr r7 - rlwinm r0,r7,0,~MSR_DR - sync - mtmsr r0 - sync - isync - stb r3,0(r4) - sync - mtmsr r7 - sync - isync - blr -_ASM_NOKPROBE_SYMBOL(real_writeb) - -#endif /* CONFIG_40x */ - /* * Copy a whole page. We use the dcbz instruction on the destination * to reduce memory traffic (it eliminates the unnecessary reads of diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index c39c07a4c06e..a997c7f43dc0 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -9,6 +9,7 @@ * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) */ +#include <linux/export.h> #include <linux/linkage.h> #include <linux/sys.h> #include <asm/unistd.h> @@ -23,7 +24,6 @@ #include <asm/kexec.h> #include <asm/ptrace.h> #include <asm/mmu.h> -#include <asm/export.h> #include <asm/feature-fixups.h> .text @@ -74,7 +74,7 @@ _GLOBAL(rmci_off) blr #endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */ -#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) +#ifdef CONFIG_PPC_PMAC /* * Do an IO access in real mode @@ -137,7 +137,7 @@ _GLOBAL(real_writeb) sync isync blr -#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */ +#endif // CONFIG_PPC_PMAC #ifdef CONFIG_PPC_PASEMI @@ -174,7 +174,7 @@ _GLOBAL(real_205_writeb) #endif /* CONFIG_PPC_PASEMI */ -#if defined(CONFIG_CPU_FREQ_PMAC64) || defined(CONFIG_CPU_FREQ_MAPLE) +#ifdef CONFIG_CPU_FREQ_PMAC64 /* * SCOM access functions for 970 (FX only for now) * @@ -192,7 +192,7 @@ _GLOBAL(scom970_read) xori r0,r0,MSR_EE mtmsrd r0,1 - /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits + /* rotate 24 bits SCOM address 8 bits left and mask out its low 8 bits * (including parity). On current CPUs they must be 0'd, * and finally or in RW bit */ @@ -226,7 +226,7 @@ _GLOBAL(scom970_write) xori r0,r0,MSR_EE mtmsrd r0,1 - /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits + /* rotate 24 bits SCOM address 8 bits left and mask out its low 8 bits * (including parity). On current CPUs they must be 0'd. */ @@ -243,7 +243,7 @@ _GLOBAL(scom970_write) /* restore interrupts */ mtmsrd r5,1 blr -#endif /* CONFIG_CPU_FREQ_PMAC64 || CONFIG_CPU_FREQ_MAPLE */ +#endif // CONFIG_CPU_FREQ_PMAC64 /* kexec_wait(phys_cpu) * @@ -432,7 +432,7 @@ _GLOBAL(kexec_sequence) 1: /* copy dest pages, flush whole dest image */ mr r3,r29 - bl kexec_copy_flush /* (image) */ + bl CFUNC(kexec_copy_flush) /* (image) */ /* turn off mmu now if not done earlier */ cmpdi r26,0 diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index f6d6ae0a1692..baeb24c102c8 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -7,7 +7,6 @@ #include <linux/elf.h> #include <linux/moduleloader.h> #include <linux/err.h> -#include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/bug.h> #include <asm/module.h> @@ -17,8 +16,6 @@ #include <asm/setup.h> #include <asm/sections.h> -static LIST_HEAD(module_bug_list); - static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, const char *name) @@ -88,40 +85,3 @@ int module_finalize(const Elf_Ehdr *hdr, return 0; } - -static __always_inline void * -__module_alloc(unsigned long size, unsigned long start, unsigned long end, bool nowarn) -{ - pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC; - gfp_t gfp = GFP_KERNEL | (nowarn ? __GFP_NOWARN : 0); - - /* - * Don't do huge page allocations for modules yet until more testing - * is done. STRICT_MODULE_RWX may require extra work to support this - * too. - */ - return __vmalloc_node_range(size, 1, start, end, gfp, prot, - VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, __builtin_return_address(0)); -} - -void *module_alloc(unsigned long size) -{ -#ifdef MODULES_VADDR - unsigned long limit = (unsigned long)_etext - SZ_32M; - void *ptr = NULL; - - BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); - - /* First try within 32M limit from _etext to avoid branch trampolines */ - if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit) - ptr = __module_alloc(size, limit, MODULES_END, true); - - if (!ptr) - ptr = __module_alloc(size, MODULES_VADDR, MODULES_END, false); - - return ptr; -#else - return __module_alloc(size, VMALLOC_START, VMALLOC_END, false); -#endif -} diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index ea6536171778..f930e3395a7f 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -18,7 +18,7 @@ #include <linux/bug.h> #include <linux/sort.h> #include <asm/setup.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> /* Count how many different relocations (different symbol, different addend) */ @@ -163,8 +163,7 @@ static uint32_t do_plt_call(void *location, pr_debug("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); /* Init, or core PLT? */ - if (location >= mod->core_layout.base - && location < mod->core_layout.base + mod->core_layout.size) + if (within_module_core((unsigned long)location, mod)) entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr; else entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; @@ -322,14 +321,14 @@ notrace int module_trampoline_target(struct module *mod, unsigned long addr, int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs) { - module->arch.tramp = do_plt_call(module->core_layout.base, + module->arch.tramp = do_plt_call(module->mem[MOD_TEXT].base, (unsigned long)ftrace_caller, sechdrs, module); if (!module->arch.tramp) return -ENOENT; #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - module->arch.tramp_regs = do_plt_call(module->core_layout.base, + module->arch.tramp_regs = do_plt_call(module->mem[MOD_TEXT].base, (unsigned long)ftrace_regs_caller, sechdrs, module); if (!module->arch.tramp_regs) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index ff045644f13f..2a44bc8e2439 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -17,7 +17,7 @@ #include <linux/kernel.h> #include <asm/module.h> #include <asm/firmware.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <linux/sort.h> #include <asm/setup.h> #include <asm/sections.h> @@ -101,32 +101,45 @@ static unsigned long stub_func_addr(func_desc_t func) /* Like PPC32, we need little trampolines to do > 24-bit jumps (into the kernel itself). But on PPC64, these need to be used for every jump, actually, to reset r2 (TOC+0x8000). */ -struct ppc64_stub_entry -{ - /* 28 byte jump instruction sequence (7 instructions). We only - * need 6 instructions on ABIv2 but we always allocate 7 so - * so we don't have to modify the trampoline load instruction. */ +struct ppc64_stub_entry { + /* + * 28 byte jump instruction sequence (7 instructions) that can + * hold ppc64_stub_insns or stub_insns. Must be 8-byte aligned + * with PCREL kernels that use prefix instructions in the stub. + */ u32 jump[7]; /* Used by ftrace to identify stubs */ u32 magic; /* Data for the above code */ func_desc_t funcdata; +} __aligned(8); + +struct ppc64_got_entry { + u64 addr; }; /* * PPC64 uses 24 bit jumps, but we need to jump into other modules or * the kernel which may be further. So we jump to a stub. * - * For ELFv1 we need to use this to set up the new r2 value (aka TOC - * pointer). For ELFv2 it's the callee's responsibility to set up the - * new r2, but for both we need to save the old r2. + * Target address and TOC are loaded from function descriptor in the + * ppc64_stub_entry. * - * We could simply patch the new r2 value and function pointer into - * the stub, but it's significantly shorter to put these values at the - * end of the stub code, and patch the stub address (32-bits relative - * to the TOC ptr, r2) into the stub. + * r12 is used to generate the target address, which is required for the + * ELFv2 global entry point calling convention. + * + * TOC handling: + * - PCREL does not have a TOC. + * - ELFv2 non-PCREL just has to save r2, the callee is responsible for + * setting its own TOC pointer at the global entry address. + * - ELFv1 must load the new TOC pointer from the function descriptor. */ static u32 ppc64_stub_insns[] = { +#ifdef CONFIG_PPC_KERNEL_PCREL + /* pld r12,addr */ + PPC_PREFIX_8LS | __PPC_PRFX_R(1), + PPC_INST_PLD | ___PPC_RT(_R12), +#else PPC_RAW_ADDIS(_R11, _R2, 0), PPC_RAW_ADDI(_R11, _R11, 0), /* Save current r2 value in magic place on the stack. */ @@ -136,13 +149,17 @@ static u32 ppc64_stub_insns[] = { /* Set up new r2 from function descriptor */ PPC_RAW_LD(_R2, _R11, 40), #endif +#endif PPC_RAW_MTCTR(_R12), PPC_RAW_BCTR(), }; -/* Count how many different 24-bit relocations (different symbol, - different addend) */ -static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num) +/* + * Count how many different r_type relocations (different symbol, + * different addend). + */ +static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num, + unsigned long r_type) { unsigned int i, r_info, r_addend, _count_relocs; @@ -151,8 +168,8 @@ static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num) r_info = 0; r_addend = 0; for (i = 0; i < num; i++) - /* Only count 24-bit relocs, others don't need stubs */ - if (ELF64_R_TYPE(rela[i].r_info) == R_PPC_REL24 && + /* Only count r_type relocs, others don't need stubs */ + if (ELF64_R_TYPE(rela[i].r_info) == r_type && (r_info != ELF64_R_SYM(rela[i].r_info) || r_addend != rela[i].r_addend)) { _count_relocs++; @@ -188,10 +205,11 @@ static int relacmp(const void *_x, const void *_y) /* Get size of potential trampolines required. */ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, - const Elf64_Shdr *sechdrs) + const Elf64_Shdr *sechdrs, + char *secstrings, + struct module *me) { - /* One extra reloc so it's always 0-addr terminated */ - unsigned long relocs = 1; + unsigned long relocs = 0; unsigned i; /* Every relocated section... */ @@ -213,23 +231,127 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, relocs += count_relocs((void *)sechdrs[i].sh_addr, sechdrs[i].sh_size - / sizeof(Elf64_Rela)); + / sizeof(Elf64_Rela), + R_PPC_REL24); +#ifdef CONFIG_PPC_KERNEL_PCREL + relocs += count_relocs((void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size + / sizeof(Elf64_Rela), + R_PPC64_REL24_NOTOC); +#endif } } -#ifdef CONFIG_DYNAMIC_FTRACE - /* make the trampoline to the ftrace_caller */ - relocs++; -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - /* an additional one for ftrace_regs_caller */ - relocs++; -#endif + /* stubs for ftrace_caller and ftrace_regs_caller */ + relocs += IS_ENABLED(CONFIG_DYNAMIC_FTRACE) + IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS); + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + /* stubs for the function tracer */ + for (i = 1; i < hdr->e_shnum; i++) { + if (!strcmp(secstrings + sechdrs[i].sh_name, "__patchable_function_entries")) { + me->arch.ool_stub_count = sechdrs[i].sh_size / sizeof(unsigned long); + me->arch.ool_stub_index = 0; + relocs += roundup(me->arch.ool_stub_count * sizeof(struct ftrace_ool_stub), + sizeof(struct ppc64_stub_entry)) / + sizeof(struct ppc64_stub_entry); + break; + } + } #endif pr_debug("Looks like a total of %lu stubs, max\n", relocs); return relocs * sizeof(struct ppc64_stub_entry); } +#ifdef CONFIG_PPC_KERNEL_PCREL +static int count_pcpu_relocs(const Elf64_Shdr *sechdrs, + const Elf64_Rela *rela, unsigned int num, + unsigned int symindex, unsigned int pcpu) +{ + unsigned int i, r_info, r_addend, _count_relocs; + + _count_relocs = 0; + r_info = 0; + r_addend = 0; + + for (i = 0; i < num; i++) { + Elf64_Sym *sym; + + /* This is the symbol it is referring to */ + sym = (Elf64_Sym *)sechdrs[symindex].sh_addr + + ELF64_R_SYM(rela[i].r_info); + + if (sym->st_shndx == pcpu && + (r_info != ELF64_R_SYM(rela[i].r_info) || + r_addend != rela[i].r_addend)) { + _count_relocs++; + r_info = ELF64_R_SYM(rela[i].r_info); + r_addend = rela[i].r_addend; + } + } + + return _count_relocs; +} + +/* Get size of potential GOT required. */ +static unsigned long get_got_size(const Elf64_Ehdr *hdr, + const Elf64_Shdr *sechdrs, + struct module *me) +{ + /* One extra reloc so it's always 0-addr terminated */ + unsigned long relocs = 1; + unsigned int i, symindex = 0; + + for (i = 1; i < hdr->e_shnum; i++) { + if (sechdrs[i].sh_type == SHT_SYMTAB) { + symindex = i; + break; + } + } + WARN_ON_ONCE(!symindex); + + /* Every relocated section... */ + for (i = 1; i < hdr->e_shnum; i++) { + if (sechdrs[i].sh_type == SHT_RELA) { + pr_debug("Found relocations in section %u\n", i); + pr_debug("Ptr: %p. Number: %llu\n", (void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size / sizeof(Elf64_Rela)); + + /* + * Sort the relocation information based on a symbol and + * addend key. This is a stable O(n*log n) complexity + * algorithm but it will reduce the complexity of + * count_relocs() to linear complexity O(n) + */ + sort((void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size / sizeof(Elf64_Rela), + sizeof(Elf64_Rela), relacmp, NULL); + + relocs += count_relocs((void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size + / sizeof(Elf64_Rela), + R_PPC64_GOT_PCREL34); + + /* + * Percpu data access typically gets linked with + * REL34 relocations, but the percpu section gets + * moved at load time and requires that to be + * converted to GOT linkage. + */ + if (IS_ENABLED(CONFIG_SMP) && symindex) + relocs += count_pcpu_relocs(sechdrs, + (void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size + / sizeof(Elf64_Rela), + symindex, me->arch.pcpu_section); + } + } + + pr_debug("Looks like a total of %lu GOT entries, max\n", relocs); + return relocs * sizeof(struct ppc64_got_entry); +} +#else /* CONFIG_PPC_KERNEL_PCREL */ + /* Still needed for ELFv2, for .TOC. */ static void dedotify_versions(struct modversion_info *vers, unsigned long size) @@ -242,6 +364,24 @@ static void dedotify_versions(struct modversion_info *vers, } } +/* Same as normal versions, remove a leading dot if present. */ +static void dedotify_ext_version_names(char *str_seq, unsigned long size) +{ + unsigned long out = 0; + unsigned long in; + char last = '\0'; + + for (in = 0; in < size; in++) { + /* Skip one leading dot */ + if (last == '\0' && str_seq[in] == '.') + in++; + last = str_seq[in]; + str_seq[out++] = last; + } + /* Zero the trailing portion of the names table for robustness */ + memset(&str_seq[out], 0, size - out); +} + /* * Undefined symbols which refer to .funcname, hack to funcname. Make .TOC. * seem to be defined (value set later). @@ -279,6 +419,7 @@ static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs, } return NULL; } +#endif /* CONFIG_PPC_KERNEL_PCREL */ bool module_init_section(const char *name) { @@ -297,20 +438,32 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, for (i = 1; i < hdr->e_shnum; i++) { if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0) me->arch.stubs_section = i; +#ifdef CONFIG_PPC_KERNEL_PCREL + else if (strcmp(secstrings + sechdrs[i].sh_name, ".data..percpu") == 0) + me->arch.pcpu_section = i; + else if (strcmp(secstrings + sechdrs[i].sh_name, ".mygot") == 0) { + me->arch.got_section = i; + if (sechdrs[i].sh_addralign < 8) + sechdrs[i].sh_addralign = 8; + } +#else else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) { me->arch.toc_section = i; if (sechdrs[i].sh_addralign < 8) sechdrs[i].sh_addralign = 8; - } - else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0) + } else if (strcmp(secstrings + sechdrs[i].sh_name, "__versions") == 0) dedotify_versions((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size); + else if (strcmp(secstrings + sechdrs[i].sh_name, "__version_ext_names") == 0) + dedotify_ext_version_names((void *)hdr + sechdrs[i].sh_offset, + sechdrs[i].sh_size); if (sechdrs[i].sh_type == SHT_SYMTAB) dedotify((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size / sizeof(Elf64_Sym), (void *)hdr + sechdrs[sechdrs[i].sh_link].sh_offset); +#endif } if (!me->arch.stubs_section) { @@ -318,26 +471,47 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, return -ENOEXEC; } +#ifdef CONFIG_PPC_KERNEL_PCREL + if (!me->arch.got_section) { + pr_err("%s: doesn't contain .mygot.\n", me->name); + return -ENOEXEC; + } + + /* Override the got size */ + sechdrs[me->arch.got_section].sh_size = get_got_size(hdr, sechdrs, me); +#else /* If we don't have a .toc, just use .stubs. We need to set r2 to some reasonable value in case the module calls out to other functions via a stub, or if a function pointer escapes the module by some means. */ if (!me->arch.toc_section) me->arch.toc_section = me->arch.stubs_section; +#endif /* Override the stubs size */ - sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs); + sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs, secstrings, me); + return 0; } -#ifdef CONFIG_MPROFILE_KERNEL +#if defined(CONFIG_MPROFILE_KERNEL) || defined(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY) static u32 stub_insns[] = { +#ifdef CONFIG_PPC_KERNEL_PCREL + PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernelbase)), + PPC_RAW_NOP(), /* align the prefix insn */ + /* paddi r12,r12,addr */ + PPC_PREFIX_MLS | __PPC_PRFX_R(0), + PPC_INST_PADDI | ___PPC_RT(_R12) | ___PPC_RA(_R12), + PPC_RAW_MTCTR(_R12), + PPC_RAW_BCTR(), +#else PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)), PPC_RAW_ADDIS(_R12, _R12, 0), PPC_RAW_ADDI(_R12, _R12, 0), PPC_RAW_MTCTR(_R12), PPC_RAW_BCTR(), +#endif }; /* @@ -358,18 +532,37 @@ static inline int create_ftrace_stub(struct ppc64_stub_entry *entry, { long reladdr; - memcpy(entry->jump, stub_insns, sizeof(stub_insns)); - - /* Stub uses address relative to kernel toc (from the paca) */ - reladdr = addr - kernel_toc_addr(); - if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { - pr_err("%s: Address of %ps out of range of kernel_toc.\n", - me->name, (void *)addr); + if ((unsigned long)entry->jump % 8 != 0) { + pr_err("%s: Address of stub entry is not 8-byte aligned\n", me->name); return 0; } - entry->jump[1] |= PPC_HA(reladdr); - entry->jump[2] |= PPC_LO(reladdr); + BUILD_BUG_ON(sizeof(stub_insns) > sizeof(entry->jump)); + memcpy(entry->jump, stub_insns, sizeof(stub_insns)); + + if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { + /* Stub uses address relative to kernel base (from the paca) */ + reladdr = addr - local_paca->kernelbase; + if (reladdr > 0x1FFFFFFFFL || reladdr < -0x200000000L) { + pr_err("%s: Address of %ps out of range of 34-bit relative address.\n", + me->name, (void *)addr); + return 0; + } + + entry->jump[2] |= IMM_H18(reladdr); + entry->jump[3] |= IMM_L(reladdr); + } else { + /* Stub uses address relative to kernel toc (from the paca) */ + reladdr = addr - kernel_toc_addr(); + if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { + pr_err("%s: Address of %ps out of range of kernel_toc.\n", + me->name, (void *)addr); + return 0; + } + + entry->jump[1] |= PPC_HA(reladdr); + entry->jump[2] |= PPC_LO(reladdr); + } /* Even though we don't use funcdata in the stub, it's needed elsewhere. */ entry->funcdata = func_desc(addr); @@ -415,7 +608,11 @@ static bool is_mprofile_ftrace_call(const char *name) */ static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me) { +#ifndef CONFIG_PPC_KERNEL_PCREL return (sechdrs[me->arch.toc_section].sh_addr & ~0xfful) + 0x8000; +#else + return -1; +#endif } /* Patch stub to reference function and correct r2 value. */ @@ -432,38 +629,62 @@ static inline int create_stub(const Elf64_Shdr *sechdrs, if (is_mprofile_ftrace_call(name)) return create_ftrace_stub(entry, addr, me); + if ((unsigned long)entry->jump % 8 != 0) { + pr_err("%s: Address of stub entry is not 8-byte aligned\n", me->name); + return 0; + } + + BUILD_BUG_ON(sizeof(ppc64_stub_insns) > sizeof(entry->jump)); for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) { if (patch_instruction(&entry->jump[i], ppc_inst(ppc64_stub_insns[i]))) return 0; } - /* Stub uses address relative to r2. */ - reladdr = (unsigned long)entry - my_r2(sechdrs, me); - if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { - pr_err("%s: Address %p of stub out of range of %p.\n", - me->name, (void *)reladdr, (void *)my_r2); - return 0; - } - pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr); + if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { + /* Stub uses address relative to itself! */ + reladdr = 0 + offsetof(struct ppc64_stub_entry, funcdata); + BUILD_BUG_ON(reladdr != 32); + if (reladdr > 0x1FFFFFFFFL || reladdr < -0x200000000L) { + pr_err("%s: Address of %p out of range of 34-bit relative address.\n", + me->name, (void *)reladdr); + return 0; + } + pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr); - if (patch_instruction(&entry->jump[0], - ppc_inst(entry->jump[0] | PPC_HA(reladdr)))) - return 0; + /* May not even need this if we're relative to 0 */ + if (patch_instruction(&entry->jump[0], + ppc_inst_prefix(entry->jump[0] | IMM_H18(reladdr), + entry->jump[1] | IMM_L(reladdr)))) + return 0; - if (patch_instruction(&entry->jump[1], - ppc_inst(entry->jump[1] | PPC_LO(reladdr)))) - return 0; + } else { + /* Stub uses address relative to r2. */ + reladdr = (unsigned long)entry - my_r2(sechdrs, me); + if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { + pr_err("%s: Address %p of stub out of range of %p.\n", + me->name, (void *)reladdr, (void *)my_r2); + return 0; + } + pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr); + + if (patch_instruction(&entry->jump[0], + ppc_inst(entry->jump[0] | PPC_HA(reladdr)))) + return 0; + + if (patch_instruction(&entry->jump[1], + ppc_inst(entry->jump[1] | PPC_LO(reladdr)))) + return 0; + } // func_desc_t is 8 bytes if ABIv2, else 16 bytes desc = func_desc(addr); for (i = 0; i < sizeof(func_desc_t) / sizeof(u32); i++) { - if (patch_instruction(((u32 *)&entry->funcdata) + i, - ppc_inst(((u32 *)(&desc))[i]))) + if (patch_u32(((u32 *)&entry->funcdata) + i, ((u32 *)&desc)[i])) return 0; } - if (patch_instruction(&entry->magic, ppc_inst(STUB_MAGIC))) + if (patch_u32(&entry->magic, STUB_MAGIC)) return 0; return 1; @@ -483,7 +704,7 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs, /* Find this stub, or if that fails, the next avail. entry */ stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr; - for (i = 0; stub_func_addr(stubs[i].funcdata); i++) { + for (i = 0; i < me->arch.stub_count; i++) { if (WARN_ON(i >= num_stubs)) return 0; @@ -494,17 +715,53 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs, if (!create_stub(sechdrs, &stubs[i], addr, me, name)) return 0; + me->arch.stub_count++; return (unsigned long)&stubs[i]; } +#ifdef CONFIG_PPC_KERNEL_PCREL +/* Create GOT to load the location described in this ptr */ +static unsigned long got_for_addr(const Elf64_Shdr *sechdrs, + unsigned long addr, + struct module *me, + const char *name) +{ + struct ppc64_got_entry *got; + unsigned int i, num_got; + + if (!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + return addr; + + num_got = sechdrs[me->arch.got_section].sh_size / sizeof(*got); + + /* Find this stub, or if that fails, the next avail. entry */ + got = (void *)sechdrs[me->arch.got_section].sh_addr; + for (i = 0; got[i].addr; i++) { + if (WARN_ON(i >= num_got)) + return 0; + + if (got[i].addr == addr) + return (unsigned long)&got[i]; + } + + got[i].addr = addr; + + return (unsigned long)&got[i]; +} +#endif + /* We expect a noop next: if it is, replace it with instruction to restore r2. */ static int restore_r2(const char *name, u32 *instruction, struct module *me) { u32 *prev_insn = instruction - 1; + u32 insn_val = *instruction; + + if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + return 0; if (is_mprofile_ftrace_call(name)) - return 1; + return 0; /* * Make sure the branch isn't a sibling call. Sibling calls aren't @@ -512,19 +769,25 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me) * restore afterwards. */ if (!instr_is_relative_link_branch(ppc_inst(*prev_insn))) - return 1; + return 0; - if (*instruction != PPC_RAW_NOP()) { - pr_err("%s: Expected nop after call, got %08x at %pS\n", - me->name, *instruction, instruction); + /* + * For livepatch, the restore r2 instruction might have already been + * written previously, if the referenced symbol is in a previously + * unloaded module which is now being loaded again. In that case, skip + * the warning and the instruction write. + */ + if (insn_val == PPC_INST_LD_TOC) return 0; + + if (insn_val != PPC_RAW_NOP()) { + pr_err("%s: Expected nop after call, got %08x at %pS\n", + me->name, insn_val, instruction); + return -ENOEXEC; } /* ld r2,R2_STACK_OFFSET(r1) */ - if (patch_instruction(instruction, ppc_inst(PPC_INST_LD_TOC))) - return 0; - - return 1; + return patch_instruction(instruction, ppc_inst(PPC_INST_LD_TOC)); } int apply_relocate_add(Elf64_Shdr *sechdrs, @@ -542,6 +805,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, pr_debug("Applying ADD relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); +#ifndef CONFIG_PPC_KERNEL_PCREL /* First time we're called, we can fix up .TOC. */ if (!me->arch.toc_fixed) { sym = find_dot_toc(sechdrs, strtab, symindex); @@ -551,7 +815,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, sym->st_value = my_r2(sechdrs, me); me->arch.toc_fixed = true; } - +#endif for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) { /* This is where to make the change */ location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr @@ -579,6 +843,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, *(unsigned long *)location = value; break; +#ifndef CONFIG_PPC_KERNEL_PCREL case R_PPC64_TOC: *(unsigned long *)location = my_r2(sechdrs, me); break; @@ -638,8 +903,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, = (*((uint16_t *) location) & ~0xffff) | (value & 0xffff); break; +#endif case R_PPC_REL24: +#ifdef CONFIG_PPC_KERNEL_PCREL + /* PCREL still generates REL24 for mcount */ + case R_PPC64_REL24_NOTOC: +#endif /* FIXME: Handle weak symbols here --RR */ if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_LIVEPATCH) { @@ -648,8 +918,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, strtab + sym->st_name); if (!value) return -ENOENT; - if (!restore_r2(strtab + sym->st_name, - (u32 *)location + 1, me)) + if (restore_r2(strtab + sym->st_name, + (u32 *)location + 1, me)) return -ENOEXEC; } else value += local_entry_offset(sym); @@ -687,6 +957,47 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, *(u32 *)location = value; break; +#ifdef CONFIG_PPC_KERNEL_PCREL + case R_PPC64_PCREL34: { + unsigned long absvalue = value; + + /* Convert value to relative */ + value -= (unsigned long)location; + + if (value + 0x200000000 > 0x3ffffffff) { + if (sym->st_shndx != me->arch.pcpu_section) { + pr_err("%s: REL34 %li out of range!\n", + me->name, (long)value); + return -ENOEXEC; + } + + /* + * per-cpu section is special cased because + * it is moved during loading, so has to be + * converted to use GOT. + */ + value = got_for_addr(sechdrs, absvalue, me, + strtab + sym->st_name); + if (!value) + return -ENOENT; + value -= (unsigned long)location; + + /* Turn pla into pld */ + if (patch_instruction((u32 *)location, + ppc_inst_prefix((*(u32 *)location & ~0x02000000), + (*((u32 *)location + 1) & ~0xf8000000) | 0xe4000000))) + return -EFAULT; + } + + if (patch_instruction((u32 *)location, + ppc_inst_prefix((*(u32 *)location & ~0x3ffff) | IMM_H18(value), + (*((u32 *)location + 1) & ~0xffff) | IMM_L(value)))) + return -EFAULT; + + break; + } + +#else case R_PPC64_TOCSAVE: /* * Marker reloc indicates we don't have to save r2. @@ -694,8 +1005,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, * it. */ break; +#endif case R_PPC64_ENTRY: + if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + break; + /* * Optimize ELFv2 large code model entry point if * the TOC is within 2GB range of current location. @@ -738,6 +1053,20 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, | (value & 0xffff); break; +#ifdef CONFIG_PPC_KERNEL_PCREL + case R_PPC64_GOT_PCREL34: + value = got_for_addr(sechdrs, value, me, + strtab + sym->st_name); + if (!value) + return -ENOENT; + value -= (unsigned long)location; + ((uint32_t *)location)[0] = (((uint32_t *)location)[0] & ~0x3ffff) | + ((value >> 16) & 0x3ffff); + ((uint32_t *)location)[1] = (((uint32_t *)location)[1] & ~0xffff) | + (value & 0xffff); + break; +#endif + default: pr_err("%s: Unknown ADD relocation: %lu\n", me->name, @@ -786,6 +1115,27 @@ int module_trampoline_target(struct module *mod, unsigned long addr, return 0; } +static int setup_ftrace_ool_stubs(const Elf64_Shdr *sechdrs, unsigned long addr, struct module *me) +{ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + unsigned int total_stubs, num_stubs; + struct ppc64_stub_entry *stub; + + total_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stub); + num_stubs = roundup(me->arch.ool_stub_count * sizeof(struct ftrace_ool_stub), + sizeof(struct ppc64_stub_entry)) / sizeof(struct ppc64_stub_entry); + + if (WARN_ON(me->arch.stub_count + num_stubs > total_stubs)) + return -1; + + stub = (void *)sechdrs[me->arch.stubs_section].sh_addr; + me->arch.ool_stubs = (struct ftrace_ool_stub *)(stub + me->arch.stub_count); + me->arch.stub_count += num_stubs; +#endif + + return 0; +} + int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) { mod->arch.tramp = stub_for_addr(sechdrs, @@ -804,6 +1154,9 @@ int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) if (!mod->arch.tramp) return -ENOENT; + if (setup_ftrace_ool_stubs(sechdrs, mod->arch.tramp, mod)) + return -ENOENT; + return 0; } #endif diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index e385d3164648..f9c6568a9137 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -73,7 +73,7 @@ static const char *nvram_os_partitions[] = { }; static void oops_to_nvram(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason); + struct kmsg_dump_detail *detail); static struct kmsg_dumper nvram_kmsg_dumper = { .dump = oops_to_nvram @@ -643,7 +643,7 @@ void __init nvram_init_oops_partition(int rtas_partition_exists) * partition. If that's too much, go back and capture uncompressed text. */ static void oops_to_nvram(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason) + struct kmsg_dump_detail *detail) { struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf; static unsigned int oops_count = 0; @@ -655,7 +655,7 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, unsigned int err_type = ERR_TYPE_KERNEL_PANIC_GZ; int rc = -1; - switch (reason) { + switch (detail->reason) { case KMSG_DUMP_SHUTDOWN: /* These are almost always orderly shutdowns. */ return; @@ -671,7 +671,7 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, break; default: pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n", - __func__, (int) reason); + __func__, (int) detail->reason); return; } diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c deleted file mode 100644 index f89376ff633e..000000000000 --- a/arch/powerpc/kernel/of_platform.c +++ /dev/null @@ -1,104 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corp. - * <benh@kernel.crashing.org> - * and Arnd Bergmann, IBM Corp. - */ - -#undef DEBUG - -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/export.h> -#include <linux/mod_devicetable.h> -#include <linux/pci.h> -#include <linux/of.h> -#include <linux/of_device.h> -#include <linux/of_platform.h> -#include <linux/atomic.h> - -#include <asm/errno.h> -#include <asm/topology.h> -#include <asm/pci-bridge.h> -#include <asm/ppc-pci.h> -#include <asm/eeh.h> - -#ifdef CONFIG_PPC_OF_PLATFORM_PCI - -/* The probing of PCI controllers from of_platform is currently - * 64 bits only, mostly due to gratuitous differences between - * the 32 and 64 bits PCI code on PowerPC and the 32 bits one - * lacking some bits needed here. - */ - -static int of_pci_phb_probe(struct platform_device *dev) -{ - struct pci_controller *phb; - - /* Check if we can do that ... */ - if (ppc_md.pci_setup_phb == NULL) - return -ENODEV; - - pr_info("Setting up PCI bus %pOF\n", dev->dev.of_node); - - /* Alloc and setup PHB data structure */ - phb = pcibios_alloc_controller(dev->dev.of_node); - if (!phb) - return -ENODEV; - - /* Setup parent in sysfs */ - phb->parent = &dev->dev; - - /* Setup the PHB using arch provided callback */ - if (ppc_md.pci_setup_phb(phb)) { - pcibios_free_controller(phb); - return -ENODEV; - } - - /* Process "ranges" property */ - pci_process_bridge_OF_ranges(phb, dev->dev.of_node, 0); - - /* Init pci_dn data structures */ - pci_devs_phb_init_dynamic(phb); - - /* Create EEH PE for the PHB */ - eeh_phb_pe_create(phb); - - /* Scan the bus */ - pcibios_scan_phb(phb); - if (phb->bus == NULL) - return -ENXIO; - - /* Claim resources. This might need some rework as well depending - * whether we are doing probe-only or not, like assigning unassigned - * resources etc... - */ - pcibios_claim_one_bus(phb->bus); - - /* Add probed PCI devices to the device model */ - pci_bus_add_devices(phb->bus); - - return 0; -} - -static const struct of_device_id of_pci_phb_ids[] = { - { .type = "pci", }, - { .type = "pcix", }, - { .type = "pcie", }, - { .type = "pciex", }, - { .type = "ht", }, - {} -}; - -static struct platform_driver of_pci_phb_driver = { - .probe = of_pci_phb_probe, - .driver = { - .name = "of-pci", - .of_match_table = of_pci_phb_ids, - }, -}; - -builtin_platform_driver(of_pci_phb_driver); - -#endif /* CONFIG_PPC_OF_PLATFORM_PCI */ diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 004fae2044a3..2e83702bf9ba 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -13,7 +13,7 @@ #include <asm/kprobes.h> #include <asm/ptrace.h> #include <asm/cacheflush.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/sstep.h> #include <asm/ppc-opcode.h> #include <asm/inst.h> @@ -56,7 +56,7 @@ static unsigned long can_optimize(struct kprobe *p) * has a 'nop' instruction, which can be emulated. * So further checks can be skipped. */ - if (p->addr == (kprobe_opcode_t *)&__kretprobe_trampoline) + if (p->addr == (kprobe_opcode_t *)&arch_rethook_trampoline) return addr + sizeof(kprobe_opcode_t); /* diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index be8db402e963..7502066c3c53 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -68,7 +68,7 @@ static void *__init alloc_shared_lppaca(unsigned long size, unsigned long limit, memblock_set_bottom_up(true); /* - * See Documentation/powerpc/ultravisor.rst for more details. + * See Documentation/arch/powerpc/ultravisor.rst for more details. * * UV/HV data sharing is in PAGE_SIZE granularity. In order to * minimize the number of pages shared, align the allocation to @@ -191,7 +191,9 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) #endif new_paca->lock_token = 0x8000; new_paca->paca_index = cpu; +#ifndef CONFIG_PPC_KERNEL_PCREL new_paca->kernel_toc = kernel_toc_addr(); +#endif new_paca->kernelbase = (unsigned long) _stext; /* Only set MSR:IR/DR when MMU is initialized */ new_paca->kernel_msr = MSR_KERNEL & ~(MSR_IR | MSR_DR); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index d67cf79bf5d0..eac84d687b53 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -125,7 +125,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev) { struct pci_controller *phb; - phb = zalloc_maybe_bootmem(sizeof(struct pci_controller), GFP_KERNEL); + phb = kzalloc(sizeof(struct pci_controller), GFP_KERNEL); if (phb == NULL) return NULL; @@ -517,12 +517,11 @@ int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma) } /* - * This one is used by /dev/mem and fbdev who have no clue about the + * This one is used by /dev/mem and video who have no clue about the * PCI device, it tries to find the PCI device first and calls the * above routine */ -pgprot_t pci_phys_mem_access_prot(struct file *file, - unsigned long pfn, +pgprot_t pci_phys_mem_access_prot(unsigned long pfn, unsigned long size, pgprot_t prot) { @@ -880,6 +879,7 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) static void pcibios_fixup_resources(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct resource *res; int i; if (!hose) { @@ -891,9 +891,9 @@ static void pcibios_fixup_resources(struct pci_dev *dev) if (dev->is_virtfn) return; - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { - struct resource *res = dev->resource + i; + pci_dev_for_each_resource(dev, res, i) { struct pci_bus_region reg; + if (!res->flags) continue; @@ -1452,11 +1452,10 @@ void pcibios_claim_one_bus(struct pci_bus *bus) struct pci_bus *child_bus; list_for_each_entry(dev, &bus->devices, bus_list) { + struct resource *r; int i; - for (i = 0; i < PCI_NUM_RESOURCES; i++) { - struct resource *r = &dev->resource[i]; - + pci_dev_for_each_resource(dev, r, i) { if (r->parent || !r->start || !r->flags) continue; @@ -1705,19 +1704,20 @@ EXPORT_SYMBOL_GPL(pcibios_scan_phb); static void fixup_hide_host_resource_fsl(struct pci_dev *dev) { - int i, class = dev->class >> 8; + int class = dev->class >> 8; /* When configured as agent, programming interface = 1 */ int prog_if = dev->class & 0xf; + struct resource *r; if ((class == PCI_CLASS_PROCESSOR_POWERPC || class == PCI_CLASS_BRIDGE_OTHER) && (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) && (prog_if == 0) && (dev->bus->parent == NULL)) { - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { - dev->resource[i].start = 0; - dev->resource[i].end = 0; - dev->resource[i].flags = 0; + pci_dev_for_each_resource(dev, r) { + r->start = 0; + r->end = 0; + r->flags = 0; } } } diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 0fe251c6ac2c..6f444d0822d8 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -93,6 +93,36 @@ void pci_hp_remove_devices(struct pci_bus *bus) } EXPORT_SYMBOL_GPL(pci_hp_remove_devices); +static void traverse_siblings_and_scan_slot(struct device_node *start, struct pci_bus *bus) +{ + struct device_node *dn; + int slotno; + + u32 class = 0; + + if (!of_property_read_u32(start->child, "class-code", &class)) { + /* Call of pci_scan_slot for non-bridge/EP case */ + if (!((class >> 8) == PCI_CLASS_BRIDGE_PCI)) { + slotno = PCI_SLOT(PCI_DN(start->child)->devfn); + pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); + return; + } + } + + /* Iterate all siblings */ + for_each_child_of_node(start, dn) { + class = 0; + + if (!of_property_read_u32(start->child, "class-code", &class)) { + /* Call of pci_scan_slot on each sibling-nodes/bridge-ports */ + if ((class >> 8) == PCI_CLASS_BRIDGE_PCI) { + slotno = PCI_SLOT(PCI_DN(dn)->devfn); + pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); + } + } + } +} + /** * pci_hp_add_devices - adds new pci devices to bus * @bus: the indicated PCI bus @@ -106,11 +136,14 @@ EXPORT_SYMBOL_GPL(pci_hp_remove_devices); */ void pci_hp_add_devices(struct pci_bus *bus) { - int slotno, mode, max; + int mode, max; struct pci_dev *dev; struct pci_controller *phb; struct device_node *dn = pci_bus_to_OF_node(bus); + if (!dn) + return; + phb = pci_bus_to_host(bus); mode = PCI_PROBE_NORMAL; @@ -129,8 +162,7 @@ void pci_hp_add_devices(struct pci_bus *bus) * order for fully rescan all the way down to pick them up. * They can have been removed during partial hotplug. */ - slotno = PCI_SLOT(PCI_DN(dn->child)->devfn); - pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); + traverse_siblings_and_scan_slot(dn, bus); max = bus->busn_res.start; /* * Scan bridges that are already configured. We don't touch diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 855b59892c5c..f8a3bd8cfae4 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -62,7 +62,7 @@ fixup_cpc710_pci64(struct pci_dev* dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CPC710_PCI64, fixup_cpc710_pci64); -#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP) +#ifdef CONFIG_PPC_PCI_OF_BUS_MAP static u8* pci_to_OF_bus_map; static int pci_bus_count; @@ -152,6 +152,7 @@ pcibios_make_OF_bus_map(void) } #endif } +#endif // CONFIG_PPC_PCI_OF_BUS_MAP #ifdef CONFIG_PPC_PMAC @@ -160,7 +161,9 @@ pcibios_make_OF_bus_map(void) */ int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn) { +#ifdef CONFIG_PPC_PCI_OF_BUS_MAP struct pci_dev *dev = NULL; +#endif const __be32 *reg; int size; @@ -175,6 +178,9 @@ int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn) *bus = (be32_to_cpup(®[0]) >> 16) & 0xff; *devfn = (be32_to_cpup(®[0]) >> 8) & 0xff; +#ifndef CONFIG_PPC_PCI_OF_BUS_MAP + return 0; +#else /* Ok, here we need some tweak. If we have already renumbered * all busses, we can't rely on the OF bus number any more. * the pci_to_OF_bus_map is not enough as several PCI busses @@ -192,11 +198,12 @@ int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn) } return -ENODEV; +#endif // CONFIG_PPC_PCI_OF_BUS_MAP } EXPORT_SYMBOL(pci_device_from_OF_node); #endif -#ifdef CONFIG_PPC_CHRP +#ifdef CONFIG_PPC_PCI_OF_BUS_MAP /* We create the "pci-OF-bus-map" property now so it appears in the * /proc device tree */ @@ -206,11 +213,8 @@ pci_create_OF_bus_map(void) struct property* of_prop; struct device_node *dn; - of_prop = memblock_alloc(sizeof(struct property) + 256, + of_prop = memblock_alloc_or_panic(sizeof(struct property) + 256, SMP_CACHE_BYTES); - if (!of_prop) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(struct property) + 256); dn = of_find_node_by_path("/"); if (dn) { memset(of_prop, -1, sizeof(struct property) + 256); @@ -221,9 +225,7 @@ pci_create_OF_bus_map(void) of_node_put(dn); } } -#endif - -#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP) */ +#endif // CONFIG_PPC_PCI_OF_BUS_MAP void pcibios_setup_phb_io_space(struct pci_controller *hose) { @@ -273,6 +275,7 @@ static int __init pcibios_init(void) } #if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP) +#ifdef CONFIG_PPC_PCI_OF_BUS_MAP pci_bus_count = next_busno; /* OpenFirmware based machines need a map of OF bus @@ -282,6 +285,7 @@ static int __init pcibios_init(void) if (pci_assign_all_buses) pcibios_make_OF_bus_map(); #endif +#endif /* Call common code to handle resource allocation */ pcibios_resource_survey(); diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 0c7cfb9fab04..e27342ef128b 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -73,7 +73,7 @@ static int __init pcibios_init(void) return 0; } -subsys_initcall(pcibios_init); +subsys_initcall_sync(pcibios_init); int pcibios_unmap_io_space(struct pci_bus *bus) { @@ -132,7 +132,7 @@ void __iomem *ioremap_phb(phys_addr_t paddr, unsigned long size) * address decoding but I'd rather not deal with those outside of the * reserved 64K legacy region. */ - area = __get_vm_area_caller(size, 0, PHB_IO_BASE, PHB_IO_END, + area = __get_vm_area_caller(size, VM_IOREMAP, PHB_IO_BASE, PHB_IO_END, __builtin_return_address(0)); if (!area) return NULL; diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c index 15414c8a2837..9fabb4d9235e 100644 --- a/arch/powerpc/kernel/pmc.c +++ b/arch/powerpc/kernel/pmc.c @@ -74,7 +74,7 @@ void release_pmc_hardware(void) } EXPORT_SYMBOL_GPL(release_pmc_hardware); -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 void power4_enable_pmcs(void) { unsigned long hid0; diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S index 49813f982468..a9b9c32d0c1f 100644 --- a/arch/powerpc/kernel/ppc_save_regs.S +++ b/arch/powerpc/kernel/ppc_save_regs.S @@ -31,10 +31,10 @@ _GLOBAL(ppc_save_regs) lbz r0,PACAIRQSOFTMASK(r13) PPC_STL r0,SOFTE(r3) #endif - /* go up one stack frame for SP */ - PPC_LL r4,0(r1) - PPC_STL r4,GPR1(r3) + /* store current SP */ + PPC_STL r1,GPR1(r3) /* get caller's LR */ + PPC_LL r4,0(r1) PPC_LL r0,LRSAVE(r4) PPC_STL r0,_LINK(r3) mflr r0 diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index b109cd7b5d01..d083b4517065 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -4,17 +4,20 @@ */ #include <linux/init.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/proc_fs.h> #include <linux/kernel.h> #include <linux/of.h> +#include <linux/string.h> #include <asm/machdep.h> #include <asm/vdso_datapage.h> #include <asm/rtas.h> +#include <asm/systemcfg.h> #include <linux/uaccess.h> -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG static loff_t page_map_seek(struct file *file, loff_t off, int whence) { @@ -33,10 +36,9 @@ static int page_map_mmap( struct file *file, struct vm_area_struct *vma ) if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) return -EINVAL; - remap_pfn_range(vma, vma->vm_start, - __pa(pde_data(file_inode(file))) >> PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot); - return 0; + return remap_pfn_range(vma, vma->vm_start, + __pa(pde_data(file_inode(file))) >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot); } static const struct proc_ops page_map_proc_ops = { @@ -45,13 +47,35 @@ static const struct proc_ops page_map_proc_ops = { .proc_mmap = page_map_mmap, }; +static union { + struct systemcfg data; + u8 page[PAGE_SIZE]; +} systemcfg_data_store __page_aligned_data; +struct systemcfg *systemcfg = &systemcfg_data_store.data; static int __init proc_ppc64_init(void) { struct proc_dir_entry *pde; + strscpy(systemcfg->eye_catcher, "SYSTEMCFG:PPC64"); + systemcfg->version.major = SYSTEMCFG_MAJOR; + systemcfg->version.minor = SYSTEMCFG_MINOR; + systemcfg->processor = mfspr(SPRN_PVR); + /* + * Fake the old platform number for pSeries and add + * in LPAR bit if necessary + */ + systemcfg->platform = 0x100; + if (firmware_has_feature(FW_FEATURE_LPAR)) + systemcfg->platform |= 1; + systemcfg->physicalMemorySize = memblock_phys_mem_size(); + systemcfg->dcache_size = ppc64_caches.l1d.size; + systemcfg->dcache_line_size = ppc64_caches.l1d.line_size; + systemcfg->icache_size = ppc64_caches.l1i.size; + systemcfg->icache_line_size = ppc64_caches.l1i.line_size; + pde = proc_create_data("powerpc/systemcfg", S_IFREG | 0444, NULL, - &page_map_proc_ops, vdso_data); + &page_map_proc_ops, systemcfg); if (!pde) return 1; proc_set_size(pde, PAGE_SIZE); @@ -60,7 +84,7 @@ static int __init proc_ppc64_init(void) } __initcall(proc_ppc64_init); -#endif /* CONFIG_PPC64 */ +#endif /* CONFIG_PPC64_PROC_SYSTEMCFG */ /* * Create the ppc64 and ppc64/rtas directories early. This allows us to diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index c22cc234672f..a45fe147868b 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -54,7 +54,7 @@ #include <asm/firmware.h> #include <asm/hw_irq.h> #endif -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/exec.h> #include <asm/livepatch.h> #include <asm/cpu_has_feature.h> @@ -72,8 +72,6 @@ #define TM_DEBUG(x...) do { } while(0) #endif -extern unsigned long _get_SP(void); - #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* * Are we running in "Suspend disabled" mode? If so we have to block any @@ -1002,7 +1000,7 @@ static inline void tm_reclaim_task(struct task_struct *tsk) WARN_ON(tm_suspend_disabled); - TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, " + TM_DEBUG("---- tm_reclaim on pid %d (NIP=%lx, " "ccr=%lx, msr=%lx, trap=%lx)\n", tsk->pid, thr->regs->nip, thr->regs->ccr, thr->regs->msr, @@ -1010,7 +1008,7 @@ static inline void tm_reclaim_task(struct task_struct *tsk) tm_reclaim_thread(thr, TM_CAUSE_RESCHED); - TM_DEBUG("--- tm_reclaim on pid %d complete\n", + TM_DEBUG("---- tm_reclaim on pid %d complete\n", tsk->pid); out_and_saveregs: @@ -1182,6 +1180,12 @@ static inline void save_sprs(struct thread_struct *t) */ t->tar = mfspr(SPRN_TAR); } + + if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) + t->hashkeyr = mfspr(SPRN_HASHKEYR); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) + t->dexcr = mfspr(SPRN_DEXCR); #endif } @@ -1195,11 +1199,11 @@ void kvmppc_save_user_regs(void) usermsr = current->thread.regs->msr; + /* Caller has enabled FP/VEC/VSX/TM in MSR */ if (usermsr & MSR_FP) - save_fpu(current); - + __giveup_fpu(current); if (usermsr & MSR_VEC) - save_altivec(current); + __giveup_altivec(current); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (usermsr & MSR_TM) { @@ -1260,6 +1264,14 @@ static inline void restore_sprs(struct thread_struct *old_thread, if (cpu_has_feature(CPU_FTR_P9_TIDR) && old_thread->tidr != new_thread->tidr) mtspr(SPRN_TIDR, new_thread->tidr); + + if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE) && + old_thread->hashkeyr != new_thread->hashkeyr) + mtspr(SPRN_HASHKEYR, new_thread->hashkeyr); + + if (cpu_has_feature(CPU_FTR_ARCH_31) && + old_thread->dexcr != new_thread->dexcr) + mtspr(SPRN_DEXCR, new_thread->dexcr); #endif } @@ -1405,8 +1417,7 @@ static void show_instructions(struct pt_regs *regs) for (i = 0; i < NR_INSN_TO_PRINT; i++) { int instr; - if (!__kernel_text_address(pc) || - get_kernel_nofault(instr, (const void *)pc)) { + if (get_kernel_nofault(instr, (const void *)pc)) { pr_cont("XXXXXXXX "); } else { if (nip == pc) @@ -1560,7 +1571,7 @@ static void __show_regs(struct pt_regs *regs) if (trap == INTERRUPT_MACHINE_CHECK || trap == INTERRUPT_DATA_STORAGE || trap == INTERRUPT_ALIGNMENT) { - if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE)) + if (IS_ENABLED(CONFIG_BOOKE)) pr_cont("DEAR: "REG" ESR: "REG" ", regs->dear, regs->esr); else pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); @@ -1628,10 +1639,17 @@ void arch_setup_new_exec(void) current->thread.regs->amr = default_amr; current->thread.regs->iamr = default_iamr; #endif + +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + current->thread.dexcr = current->thread.dexcr_onexec; + mtspr(SPRN_DEXCR, current->thread.dexcr); + } +#endif /* CONFIG_PPC_BOOK3S_64 */ } #ifdef CONFIG_PPC64 -/** +/* * Assign a TIDR (thread ID) for task @t and set it in the thread * structure. For now, we only support setting TIDR for 'current' task. * @@ -1641,7 +1659,7 @@ void arch_setup_new_exec(void) * cases will happen: * * 1. The correct thread is running, the wrong thread is not - * In this situation, the correct thread is woken and proceeds to pass it's + * In this situation, the correct thread is woken and proceeds to pass its * condition check. * * 2. Neither threads are running @@ -1651,15 +1669,15 @@ void arch_setup_new_exec(void) * for the wrong thread, or they will execute the condition check immediately. * * 3. The wrong thread is running, the correct thread is not - * The wrong thread will be woken, but will fail it's condition check and + * The wrong thread will be woken, but will fail its condition check and * re-execute wait. The correct thread, when scheduled, will execute either - * it's condition check (which will pass), or wait, which returns immediately - * when called the first time after the thread is scheduled, followed by it's + * its condition check (which will pass), or wait, which returns immediately + * when called the first time after the thread is scheduled, followed by its * condition check (which will pass). * * 4. Both threads are running - * Both threads will be woken. The wrong thread will fail it's condition check - * and execute another wait, while the correct thread will pass it's condition + * Both threads will be woken. The wrong thread will fail its condition check + * and execute another wait, while the correct thread will pass its condition * check. * * @t: the task to set the thread ID for @@ -1739,68 +1757,83 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp) */ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { - unsigned long clone_flags = args->flags; - unsigned long usp = args->stack; - unsigned long tls = args->tls; - struct pt_regs *childregs, *kregs; + struct pt_regs *kregs; /* Switch frame regs */ extern void ret_from_fork(void); extern void ret_from_fork_scv(void); - extern void ret_from_kernel_thread(void); + extern void ret_from_kernel_user_thread(void); + extern void start_kernel_thread(void); void (*f)(void); unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE; - struct thread_info *ti = task_thread_info(p); #ifdef CONFIG_HAVE_HW_BREAKPOINT int i; #endif klp_init_thread_info(p); - /* Create initial stack frame. */ - sp -= STACK_USER_INT_FRAME_SIZE; - *(unsigned long *)(sp + STACK_INT_FRAME_MARKER) = STACK_FRAME_REGS_MARKER; - - /* Copy registers */ - childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS); - if (unlikely(args->fn)) { + if (unlikely(p->flags & PF_KTHREAD)) { /* kernel thread */ + + /* Create initial minimum stack frame. */ + sp -= STACK_FRAME_MIN_SIZE; ((unsigned long *)sp)[0] = 0; - memset(childregs, 0, sizeof(struct pt_regs)); - childregs->gpr[1] = sp + STACK_USER_INT_FRAME_SIZE; - /* function */ - if (args->fn) - childregs->gpr[14] = ppc_function_entry((void *)args->fn); -#ifdef CONFIG_PPC64 - clear_tsk_thread_flag(p, TIF_32BIT); - childregs->softe = IRQS_ENABLED; -#endif - childregs->gpr[15] = (unsigned long)args->fn_arg; + + f = start_kernel_thread; p->thread.regs = NULL; /* no user register state */ - ti->flags |= _TIF_RESTOREALL; - f = ret_from_kernel_thread; + clear_tsk_compat_task(p); } else { /* user thread */ - struct pt_regs *regs = current_pt_regs(); - *childregs = *regs; - if (usp) - childregs->gpr[1] = usp; - ((unsigned long *)sp)[0] = childregs->gpr[1]; - p->thread.regs = childregs; - /* 64s sets this in ret_from_fork */ - if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) - childregs->gpr[3] = 0; /* Result from fork() */ - if (clone_flags & CLONE_SETTLS) { - if (!is_32bit_task()) - childregs->gpr[13] = tls; + struct pt_regs *childregs; + + /* Create initial user return stack frame. */ + sp -= STACK_USER_INT_FRAME_SIZE; + *(unsigned long *)(sp + STACK_INT_FRAME_MARKER) = STACK_FRAME_REGS_MARKER; + + childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS); + + if (unlikely(args->fn)) { + /* + * A user space thread, but it first runs a kernel + * thread, and then returns as though it had called + * execve rather than fork, so user regs will be + * filled in (e.g., by kernel_execve()). + */ + ((unsigned long *)sp)[0] = 0; + memset(childregs, 0, sizeof(struct pt_regs)); +#ifdef CONFIG_PPC64 + childregs->softe = IRQS_ENABLED; +#endif + f = ret_from_kernel_user_thread; + } else { + struct pt_regs *regs = current_pt_regs(); + u64 clone_flags = args->flags; + unsigned long usp = args->stack; + + /* Copy registers */ + *childregs = *regs; + if (usp) + childregs->gpr[1] = usp; + ((unsigned long *)sp)[0] = childregs->gpr[1]; +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + WARN_ON_ONCE(childregs->softe != IRQS_ENABLED); +#endif + if (clone_flags & CLONE_SETTLS) { + unsigned long tls = args->tls; + + if (!is_32bit_task()) + childregs->gpr[13] = tls; + else + childregs->gpr[2] = tls; + } + + if (trap_is_scv(regs)) + f = ret_from_fork_scv; else - childregs->gpr[2] = tls; + f = ret_from_fork; } - if (trap_is_scv(regs)) - f = ret_from_fork_scv; - else - f = ret_from_fork; + childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX); + p->thread.regs = childregs; } - childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX); /* * The way this works is that at some point in the future @@ -1814,6 +1847,16 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) sp -= STACK_SWITCH_FRAME_SIZE; ((unsigned long *)sp)[0] = sp + STACK_SWITCH_FRAME_SIZE; kregs = (struct pt_regs *)(sp + STACK_SWITCH_FRAME_REGS); + kregs->nip = ppc_function_entry(f); + if (unlikely(args->fn)) { + /* + * Put kthread fn, arg parameters in non-volatile GPRs in the + * switch frame so they are loaded by _switch before it returns + * to ret_from_kernel_thread. + */ + kregs->gpr[14] = ppc_function_entry((void *)args->fn); + kregs->gpr[15] = (unsigned long)args->fn_arg; + } p->thread.ksp = sp; #ifdef CONFIG_HAVE_HW_BREAKPOINT @@ -1830,7 +1873,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) #if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) p->thread.kuap = KUAP_NONE; #endif -#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP) +#if defined(CONFIG_BOOKE) && defined(CONFIG_PPC_KUAP) p->thread.pid = MMU_NO_CONTEXT; #endif @@ -1841,27 +1884,19 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.dscr_inherit = current->thread.dscr_inherit; p->thread.dscr = mfspr(SPRN_DSCR); } - if (cpu_has_feature(CPU_FTR_HAS_PPR)) - childregs->ppr = DEFAULT_PPR; p->thread.tidr = 0; #endif - /* - * Run with the current AMR value of the kernel - */ -#ifdef CONFIG_PPC_PKEY - if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) - kregs->amr = AMR_KUAP_BLOCKED; +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) + p->thread.hashkeyr = current->thread.hashkeyr; - if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) - kregs->iamr = AMR_KUEP_BLOCKED; + if (cpu_has_feature(CPU_FTR_ARCH_31)) + p->thread.dexcr = mfspr(SPRN_DEXCR); #endif - kregs->nip = ppc_function_entry(f); return 0; } -void preload_new_slb_context(unsigned long start, unsigned long sp); - /* * Set up a thread for executing a new program */ @@ -1869,9 +1904,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) { #ifdef CONFIG_PPC64 unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */ - - if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled()) - preload_new_slb_context(start, sp); #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -1923,8 +1955,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) * address of _start and the second entry is the TOC * value we need to use. */ - __get_user(entry, (unsigned long __user *)start); - __get_user(toc, (unsigned long __user *)start+1); + get_user(entry, (unsigned long __user *)start); + get_user(toc, (unsigned long __user *)start+1); /* Check whether the e_entry function descriptor entries * need to be relocated before we can use them. @@ -1973,6 +2005,12 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.tm_tfiar = 0; current->thread.load_tm = 0; #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) { + current->thread.hashkeyr = get_random_long(); + mtspr(SPRN_HASHKEYR, current->thread.hashkeyr); + } +#endif /* CONFIG_PPC_BOOK3S_64 */ } EXPORT_SYMBOL(start_thread); @@ -2118,6 +2156,9 @@ static inline int valid_irq_stack(unsigned long sp, struct task_struct *p, unsigned long stack_page; unsigned long cpu = task_cpu(p); + if (!hardirq_ctx[cpu] || !softirq_ctx[cpu]) + return 0; + stack_page = (unsigned long)hardirq_ctx[cpu]; if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) return 1; @@ -2129,16 +2170,24 @@ static inline int valid_irq_stack(unsigned long sp, struct task_struct *p, return 0; } +#ifdef CONFIG_PPC64 static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, unsigned long nbytes) { -#ifdef CONFIG_PPC64 unsigned long stack_page; unsigned long cpu = task_cpu(p); if (!paca_ptrs) return 0; + if (!paca_ptrs[cpu]->emergency_sp) + return 0; + +# ifdef CONFIG_PPC_BOOK3S_64 + if (!paca_ptrs[cpu]->nmi_emergency_sp || !paca_ptrs[cpu]->mc_emergency_sp) + return 0; +#endif + stack_page = (unsigned long)paca_ptrs[cpu]->emergency_sp - THREAD_SIZE; if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) return 1; @@ -2152,10 +2201,26 @@ static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) return 1; # endif -#endif return 0; } +#else +static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, + unsigned long nbytes) +{ + unsigned long stack_page; + unsigned long cpu = task_cpu(p); + + if (!IS_ENABLED(CONFIG_VMAP_STACK)) + return 0; + + stack_page = (unsigned long)emergency_ctx[cpu] - THREAD_SIZE; + if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; + + return 0; +} +#endif /* * validate the stack frame of a particular minimum size, used for when we are @@ -2219,6 +2284,22 @@ unsigned long __get_wchan(struct task_struct *p) return ret; } +static bool empty_user_regs(struct pt_regs *regs, struct task_struct *tsk) +{ + unsigned long stack_page; + + // A non-empty pt_regs should never have a zero MSR or TRAP value. + if (regs->msr || regs->trap) + return false; + + // Check it sits at the very base of the stack + stack_page = (unsigned long)task_stack_page(tsk); + if ((unsigned long)(regs + 1) != stack_page + THREAD_SIZE) + return false; + + return true; +} + static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH; void __no_sanitize_address show_stack(struct task_struct *tsk, @@ -2281,11 +2362,15 @@ void __no_sanitize_address show_stack(struct task_struct *tsk, (sp + STACK_INT_FRAME_REGS); lr = regs->link; - printk("%s--- interrupt: %lx at %pS\n", + printk("%s---- interrupt: %lx at %pS\n", loglvl, regs->trap, (void *)regs->nip); - __show_regs(regs); - printk("%s--- interrupt: %lx\n", - loglvl, regs->trap); + + // Detect the case of an empty pt_regs at the very base + // of the stack and suppress showing it in full. + if (!empty_user_regs(regs, tsk)) { + __show_regs(regs); + printk("%s---- interrupt: %lx\n", loglvl, regs->trap); + } firstframe = 1; } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 4f1c920aa13e..9ed9dde7d231 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -56,6 +56,7 @@ #include <asm/drmem.h> #include <asm/ultravisor.h> #include <asm/prom.h> +#include <asm/plpks.h> #include <mm/mmu_decl.h> @@ -150,6 +151,9 @@ static void __init move_device_tree(void) * pa-features property is missing, or a 1/0 to indicate if the feature * is supported/not supported. Note that the bit numbers are * big-endian to match the definition in PAPR. + * Note: the 'clear' flag clears the feature if the bit is set in the + * ibm,pa/pi-features property, it does not set the feature if the + * bit is clear. */ struct ibm_feature { unsigned long cpu_features; /* CPU_FTR_xxx bit */ @@ -158,7 +162,7 @@ struct ibm_feature { unsigned int cpu_user_ftrs2; /* PPC_FEATURE2_xxx bit */ unsigned char pabyte; /* byte number in ibm,pa/pi-features */ unsigned char pabit; /* bit number (big-endian) */ - unsigned char invert; /* if 1, pa bit set => clear feature */ + unsigned char clear; /* if 1, pa bit set => clear feature */ }; static struct ibm_feature ibm_pa_features[] __initdata = { @@ -181,6 +185,7 @@ static struct ibm_feature ibm_pa_features[] __initdata = { .cpu_user_ftrs2 = PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_HTM_NOSC_COMP }, { .pabyte = 64, .pabit = 0, .cpu_features = CPU_FTR_DAWR1 }, + { .pabyte = 68, .pabit = 5, .cpu_features = CPU_FTR_DEXCR_NPHIE }, }; /* @@ -191,6 +196,7 @@ static struct ibm_feature ibm_pa_features[] __initdata = { */ static struct ibm_feature ibm_pi_features[] __initdata = { { .pabyte = 0, .pabit = 3, .mmu_features = MMU_FTR_NX_DSI }, + { .pabyte = 0, .pabit = 4, .cpu_features = CPU_FTR_DBELL, .clear = 1 }, }; static void __init scan_features(unsigned long node, const unsigned char *ftrs, @@ -218,12 +224,12 @@ static void __init scan_features(unsigned long node, const unsigned char *ftrs, if (fp->pabyte >= ftrs[0]) continue; bit = (ftrs[2 + fp->pabyte] >> (7 - fp->pabit)) & 1; - if (bit ^ fp->invert) { + if (bit && !fp->clear) { cur_cpu_spec->cpu_features |= fp->cpu_features; cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs; cur_cpu_spec->cpu_user_features2 |= fp->cpu_user_ftrs2; cur_cpu_spec->mmu_features |= fp->mmu_features; - } else { + } else if (bit == fp->clear) { cur_cpu_spec->cpu_features &= ~fp->cpu_features; cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs; cur_cpu_spec->cpu_user_features2 &= ~fp->cpu_user_ftrs2; @@ -325,6 +331,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, void *data) { const char *type = of_get_flat_dt_prop(node, "device_type", NULL); + const __be32 *cpu_version = NULL; const __be32 *prop; const __be32 *intserv; int i, nthreads; @@ -366,12 +373,30 @@ static int __init early_init_dt_scan_cpus(unsigned long node, if (found < 0) return 0; - DBG("boot cpu: logical %d physical %d\n", found, - be32_to_cpu(intserv[found_thread])); boot_cpuid = found; - // Pass the boot CPU's hard CPU id back to our caller - *((u32 *)data) = be32_to_cpu(intserv[found_thread]); + if (IS_ENABLED(CONFIG_PPC64)) + boot_cpu_hwid = be32_to_cpu(intserv[found_thread]); + + if (nr_cpu_ids % nthreads != 0) { + set_nr_cpu_ids(ALIGN(nr_cpu_ids, nthreads)); + pr_warn("nr_cpu_ids was not a multiple of threads_per_core, adjusted to %d\n", + nr_cpu_ids); + } + + if (boot_cpuid >= nr_cpu_ids) { + // Remember boot core for smp_setup_cpu_maps() + boot_core_hwid = be32_to_cpu(intserv[0]); + + pr_warn("Boot CPU %d (core hwid %d) >= nr_cpu_ids, adjusted boot CPU to %d\n", + boot_cpuid, boot_core_hwid, found_thread); + + // Adjust boot CPU to appear on logical core 0 + boot_cpuid = found_thread; + } + + DBG("boot cpu: logical %d physical %d\n", boot_cpuid, + be32_to_cpu(intserv[found_thread])); /* * PAPR defines "logical" PVR values for cpus that @@ -396,7 +421,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, prop = of_get_flat_dt_prop(node, "cpu-version", NULL); if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000) { identify_cpu(0, be32_to_cpup(prop)); - seq_buf_printf(&ppc_hw_desc, "0x%04x ", be32_to_cpup(prop)); + cpu_version = prop; } check_cpu_feature_properties(node); @@ -407,6 +432,12 @@ static int __init early_init_dt_scan_cpus(unsigned long node, } identical_pvr_fixup(node); + + // We can now add the CPU name & PVR to the hardware description + seq_buf_printf(&ppc_hw_desc, "%s 0x%04lx ", cur_cpu_spec->cpu_name, mfspr(SPRN_PVR)); + if (cpu_version) + seq_buf_printf(&ppc_hw_desc, "0x%04x ", be32_to_cpup(cpu_version)); + init_mmu_slb_size(node); #ifdef CONFIG_PPC64 @@ -451,7 +482,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node, tce_alloc_end = *lprop; #endif -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_RESERVE lprop = of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL); if (lprop) crashk_res.start = *lprop; @@ -755,13 +786,12 @@ static inline void save_fscr_to_task(void) {} void __init early_init_devtree(void *params) { - u32 boot_cpu_hwid; - phys_addr_t limit; + phys_addr_t int_vector_size; DBG(" -> early_init_devtree(%px)\n", params); /* Too early to BUG_ON(), do it by hand */ - if (!early_init_dt_verify(params)) + if (!early_init_dt_verify(params, __pa(params))) panic("BUG: Failed verifying flat device tree, bad version?"); of_scan_flat_dt(early_init_dt_scan_model, NULL); @@ -790,6 +820,9 @@ void __init early_init_devtree(void *params) */ of_scan_flat_dt(early_init_dt_scan_chosen_ppc, boot_command_line); + /* Append additional parameters passed for fadump capture kernel */ + fadump_append_bootargs(); + /* Scan memory nodes and rebuild MEMBLOCKs */ early_init_dt_scan_root(); early_init_dt_scan_memory_ppc(); @@ -809,9 +842,16 @@ void __init early_init_devtree(void *params) setup_initial_memory_limit(memstart_addr, first_memblock_size); /* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */ memblock_reserve(PHYSICAL_START, __pa(_end) - PHYSICAL_START); +#ifdef CONFIG_PPC64 + /* If relocatable, reserve at least 32k for interrupt vectors etc. */ + int_vector_size = __end_interrupts - _stext; + int_vector_size = max_t(phys_addr_t, SZ_32K, int_vector_size); +#else /* If relocatable, reserve first 32k for interrupt vectors etc. */ + int_vector_size = SZ_32K; +#endif if (PHYSICAL_START > MEMORY_START) - memblock_reserve(MEMORY_START, 0x8000); + memblock_reserve(MEMORY_START, int_vector_size); reserve_kdump_trampoline(); #if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) /* @@ -820,12 +860,15 @@ void __init early_init_devtree(void *params) */ if (fadump_reserve_mem() == 0) #endif - reserve_crashkernel(); + arch_reserve_crashkernel(); early_reserve_mem(); - /* Ensure that total memory size is page-aligned. */ - limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE); - memblock_enforce_memory_limit(limit); + if (memory_limit > memblock_phys_mem_size()) + memory_limit = 0; + + /* Align down to 16 MB which is large page size with hash page translation */ + memory_limit = ALIGN_DOWN(memory_limit ?: memblock_phys_mem_size(), SZ_16M); + memblock_enforce_memory_limit(memory_limit); #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_4K_PAGES) if (!early_radix_enabled()) @@ -845,13 +888,10 @@ void __init early_init_devtree(void *params) dt_cpu_ftrs_scan(); - // We can now add the CPU name & PVR to the hardware description - seq_buf_printf(&ppc_hw_desc, "%s 0x%04lx ", cur_cpu_spec->cpu_name, mfspr(SPRN_PVR)); - /* Retrieve CPU related informations from the flat tree * (altivec support, boot CPU ID, ...) */ - of_scan_flat_dt(early_init_dt_scan_cpus, &boot_cpu_hwid); + of_scan_flat_dt(early_init_dt_scan_cpus, NULL); if (boot_cpuid < 0) { printk("Failed to identify boot CPU !\n"); BUG(); @@ -868,10 +908,8 @@ void __init early_init_devtree(void *params) mmu_early_init_devtree(); - // NB. paca is not installed until later in early_setup() - allocate_paca_ptrs(); - allocate_paca(boot_cpuid); - set_hard_smp_processor_id(boot_cpuid, boot_cpu_hwid); + /* Setup param area for passing additional parameters to fadump capture kernel. */ + fadump_setup_param_area(); #ifdef CONFIG_PPC_POWERNV /* Scan and build the list of machine check recoverable ranges */ @@ -893,6 +931,9 @@ void __init early_init_devtree(void *params) powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE; #endif + /* If kexec left a PLPKS password in the DT, get it and clear it */ + plpks_early_init_devtree(); + tm_init(); DBG(" <- early_init_devtree()\n"); diff --git a/arch/powerpc/kernel/prom_entry_64.S b/arch/powerpc/kernel/prom_entry_64.S new file mode 100644 index 000000000000..f1b8793d28c6 --- /dev/null +++ b/arch/powerpc/kernel/prom_entry_64.S @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP + * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu> + * Adapted for Power Macintosh by Paul Mackerras. + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net). + * + * This file contains the 64-bit prom entry code. + */ +#include <asm/asm-offsets.h> +#ifdef CONFIG_PPC_BOOK3S +#include <asm/exception-64s.h> +#else +#include <asm/exception-64e.h> +#endif +#include <asm/ppc_asm.h> + +.section ".text","ax",@progbits + +_GLOBAL(enter_prom) + mflr r0 + std r0,16(r1) + stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space */ + + /* Because PROM is running in 32b mode, it clobbers the high order half + * of all registers that it saves. We therefore save those registers + * PROM might touch to the stack. (r0, r3-r13 are caller saved) + */ + SAVE_GPR(2, r1) + SAVE_GPR(13, r1) + SAVE_NVGPRS(r1) + mfcr r10 + mfmsr r11 + std r10,_CCR(r1) + std r11,_MSR(r1) + + /* Put PROM address in SRR0 */ + mtsrr0 r4 + + /* Setup our trampoline return addr in LR */ + bcl 20,31,$+4 +0: mflr r4 + addi r4,r4,(1f - 0b) + mtlr r4 + + /* Prepare a 32-bit mode big endian MSR + */ +#ifdef CONFIG_PPC_BOOK3E_64 + rlwinm r11,r11,0,1,31 + mtsrr1 r11 + rfi +#else /* CONFIG_PPC_BOOK3E_64 */ + LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_LE) + andc r11,r11,r12 + mtsrr1 r11 + RFI_TO_KERNEL +#endif /* CONFIG_PPC_BOOK3E_64 */ + +1: /* Return from OF */ + FIXUP_ENDIAN + + /* Just make sure that r1 top 32 bits didn't get + * corrupt by OF + */ + rldicl r1,r1,0,32 + + /* Restore the MSR (back to 64 bits) */ + ld r0,_MSR(r1) + MTMSRD(r0) + isync + + /* Restore other registers */ + REST_GPR(2, r1) + REST_GPR(13, r1) + REST_NVGPRS(r1) + ld r4,_CCR(r1) + mtcr r4 + + addi r1,r1,SWITCH_FRAME_SIZE + ld r0,16(r1) + mtlr r0 + blr diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index d464ba412084..827c958677f8 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -817,8 +817,8 @@ static void __init early_cmdline_parse(void) opt += 4; prom_memory_limit = prom_memparse(opt, (const char **)&opt); #ifdef CONFIG_PPC64 - /* Align to 16 MB == size of ppc64 large page */ - prom_memory_limit = ALIGN(prom_memory_limit, 0x1000000); + /* Align down to 16 MB which is large page size with hash page translation */ + prom_memory_limit = ALIGN_DOWN(prom_memory_limit, SZ_16M); #endif } @@ -947,7 +947,7 @@ struct option_vector7 { } __packed; struct ibm_arch_vec { - struct { u32 mask, val; } pvrs[14]; + struct { __be32 mask, val; } pvrs[16]; u8 num_vectors; @@ -1008,6 +1008,14 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { .val = cpu_to_be32(0x00800000), }, { + .mask = cpu_to_be32(0xffff0000), /* POWER11 */ + .val = cpu_to_be32(0x00820000), + }, + { + .mask = cpu_to_be32(0xffffffff), /* P11 compliant */ + .val = cpu_to_be32(0x0f000007), + }, + { .mask = cpu_to_be32(0xffffffff), /* all 3.1-compliant */ .val = cpu_to_be32(0x0f000006), }, @@ -1053,7 +1061,7 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { .virt_base = cpu_to_be32(0xffffffff), .virt_size = cpu_to_be32(0xffffffff), .load_base = cpu_to_be32(0xffffffff), - .min_rma = cpu_to_be32(512), /* 512MB min RMA */ + .min_rma = cpu_to_be32(MIN_RMA), .min_load = cpu_to_be32(0xffffffff), /* full client load */ .min_rma_percent = 0, /* min RMA percentage of total RAM */ .max_pft_size = 48, /* max log_2(hash table size) */ @@ -2784,91 +2792,6 @@ static void __init flatten_device_tree(void) dt_struct_start, dt_struct_end); } -#ifdef CONFIG_PPC_MAPLE -/* PIBS Version 1.05.0000 04/26/2005 has an incorrect /ht/isa/ranges property. - * The values are bad, and it doesn't even have the right number of cells. */ -static void __init fixup_device_tree_maple(void) -{ - phandle isa; - u32 rloc = 0x01002000; /* IO space; PCI device = 4 */ - u32 isa_ranges[6]; - char *name; - - name = "/ht@0/isa@4"; - isa = call_prom("finddevice", 1, 1, ADDR(name)); - if (!PHANDLE_VALID(isa)) { - name = "/ht@0/isa@6"; - isa = call_prom("finddevice", 1, 1, ADDR(name)); - rloc = 0x01003000; /* IO space; PCI device = 6 */ - } - if (!PHANDLE_VALID(isa)) - return; - - if (prom_getproplen(isa, "ranges") != 12) - return; - if (prom_getprop(isa, "ranges", isa_ranges, sizeof(isa_ranges)) - == PROM_ERROR) - return; - - if (isa_ranges[0] != 0x1 || - isa_ranges[1] != 0xf4000000 || - isa_ranges[2] != 0x00010000) - return; - - prom_printf("Fixing up bogus ISA range on Maple/Apache...\n"); - - isa_ranges[0] = 0x1; - isa_ranges[1] = 0x0; - isa_ranges[2] = rloc; - isa_ranges[3] = 0x0; - isa_ranges[4] = 0x0; - isa_ranges[5] = 0x00010000; - prom_setprop(isa, name, "ranges", - isa_ranges, sizeof(isa_ranges)); -} - -#define CPC925_MC_START 0xf8000000 -#define CPC925_MC_LENGTH 0x1000000 -/* The values for memory-controller don't have right number of cells */ -static void __init fixup_device_tree_maple_memory_controller(void) -{ - phandle mc; - u32 mc_reg[4]; - char *name = "/hostbridge@f8000000"; - u32 ac, sc; - - mc = call_prom("finddevice", 1, 1, ADDR(name)); - if (!PHANDLE_VALID(mc)) - return; - - if (prom_getproplen(mc, "reg") != 8) - return; - - prom_getprop(prom.root, "#address-cells", &ac, sizeof(ac)); - prom_getprop(prom.root, "#size-cells", &sc, sizeof(sc)); - if ((ac != 2) || (sc != 2)) - return; - - if (prom_getprop(mc, "reg", mc_reg, sizeof(mc_reg)) == PROM_ERROR) - return; - - if (mc_reg[0] != CPC925_MC_START || mc_reg[1] != CPC925_MC_LENGTH) - return; - - prom_printf("Fixing up bogus hostbridge on Maple...\n"); - - mc_reg[0] = 0x0; - mc_reg[1] = CPC925_MC_START; - mc_reg[2] = 0x0; - mc_reg[3] = CPC925_MC_LENGTH; - prom_setprop(mc, name, "reg", mc_reg, sizeof(mc_reg)); -} -#else -#define fixup_device_tree_maple() -#define fixup_device_tree_maple_memory_controller() -#endif - -#ifdef CONFIG_PPC_CHRP /* * Pegasos and BriQ lacks the "ranges" property in the isa node * Pegasos needs decimal IRQ 14/15, not hexadecimal @@ -2919,12 +2842,8 @@ static void __init fixup_device_tree_chrp(void) } } } -#else -#define fixup_device_tree_chrp() -#endif -#if defined(CONFIG_PPC64) && defined(CONFIG_PPC_PMAC) -static void __init fixup_device_tree_pmac(void) +static void __init fixup_device_tree_pmac64(void) { phandle u3, i2c, mpic; u32 u3_rev; @@ -2963,11 +2882,27 @@ static void __init fixup_device_tree_pmac(void) prom_setprop(i2c, "/u3@0,f8000000/i2c@f8001000", "interrupt-parent", &parent, sizeof(parent)); } -#else -#define fixup_device_tree_pmac() -#endif -#ifdef CONFIG_PPC_EFIKA +static void __init fixup_device_tree_pmac(void) +{ + __be32 val = 1; + char type[8]; + phandle node; + + // Some pmacs are missing #size-cells on escc or i2s nodes + for (node = 0; prom_next_node(&node); ) { + type[0] = '\0'; + prom_getprop(node, "device_type", type, sizeof(type)); + if (prom_strcmp(type, "escc") && prom_strcmp(type, "i2s")) + continue; + + if (prom_getproplen(node, "#size-cells") != PROM_ERROR) + continue; + + prom_setprop(node, NULL, "#size-cells", &val, sizeof(val)); + } +} + /* * The MPC5200 FEC driver requires an phy-handle property to tell it how * to talk to the phy. If the phy-handle property is missing, then this @@ -3099,11 +3034,7 @@ static void __init fixup_device_tree_efika(void) /* Make sure ethernet phy-handle property exists */ fixup_device_tree_efika_add_phy(); } -#else -#define fixup_device_tree_efika() -#endif -#ifdef CONFIG_PPC_PASEMI_NEMO /* * CFE supplied on Nemo is broken in several ways, biggest * problem is that it reassigns ISA interrupts to unused mpic ints. @@ -3179,18 +3110,23 @@ static void __init fixup_device_tree_pasemi(void) prom_setprop(iob, name, "device_type", "isa", sizeof("isa")); } -#else /* !CONFIG_PPC_PASEMI_NEMO */ -static inline void fixup_device_tree_pasemi(void) { } -#endif static void __init fixup_device_tree(void) { - fixup_device_tree_maple(); - fixup_device_tree_maple_memory_controller(); - fixup_device_tree_chrp(); - fixup_device_tree_pmac(); - fixup_device_tree_efika(); - fixup_device_tree_pasemi(); + if (IS_ENABLED(CONFIG_PPC_CHRP)) + fixup_device_tree_chrp(); + + if (IS_ENABLED(CONFIG_PPC_PMAC)) + fixup_device_tree_pmac(); + + if (IS_ENABLED(CONFIG_PPC_PMAC) && IS_ENABLED(CONFIG_PPC64)) + fixup_device_tree_pmac64(); + + if (IS_ENABLED(CONFIG_PPC_EFIKA)) + fixup_device_tree_efika(); + + if (IS_ENABLED(CONFIG_PPC_PASEMI_NEMO)) + fixup_device_tree_pasemi(); } static void __init prom_find_boot_cpu(void) diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 311890d71c4c..3090b97258ae 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -13,8 +13,13 @@ # If you really need to reference something from prom_init.o add # it to the list below: -grep "^CONFIG_KASAN=y$" ${KCONFIG_CONFIG} >/dev/null -if [ $? -eq 0 ] +has_renamed_memintrinsics() +{ + grep -q "^CONFIG_KASAN=y$" "${KCONFIG_CONFIG}" && \ + ! grep -q "^CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX=y" "${KCONFIG_CONFIG}" +} + +if has_renamed_memintrinsics then MEM_FUNCS="__memcpy __memset" else @@ -37,25 +42,24 @@ check_section() { file=$1 section=$2 - size=$(objdump -h -j $section $file 2>/dev/null | awk "\$2 == \"$section\" {print \$3}") + size=$(objdump -h -j "$section" "$file" 2>/dev/null | awk "\$2 == \"$section\" {print \$3}") size=${size:-0} - if [ $size -ne 0 ]; then + if [ "$size" -ne 0 ]; then ERROR=1 echo "Error: Section $section not empty in prom_init.c" >&2 fi } -for UNDEF in $($NM -u $OBJ | awk '{print $2}') +for UNDEF in $($NM -u "$OBJ" | awk '{print $2}') do # On 64-bit nm gives us the function descriptors, which have # a leading . on the name, so strip it off here. UNDEF="${UNDEF#.}" - if [ $KBUILD_VERBOSE ]; then - if [ $KBUILD_VERBOSE -ne 0 ]; then - echo "Checking prom_init.o symbol '$UNDEF'" - fi - fi + case "$KBUILD_VERBOSE" in + *1*) + echo "Checking prom_init.o symbol '$UNDEF'" ;; + esac OK=0 for WHITE in $WHITELIST @@ -83,8 +87,8 @@ do fi done -check_section $OBJ .data -check_section $OBJ .bss -check_section $OBJ .init.data +check_section "$OBJ" .data +check_section "$OBJ" .bss +check_section "$OBJ" .init.data exit $ERROR diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h index eafe5f0f6289..4171a5727197 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-decl.h +++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h @@ -1,5 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ +#include <linux/regset.h> + /* * Set of msr bits that gdb can change on behalf of a process. */ @@ -55,6 +57,10 @@ enum powerpc_regset { REGSET_TAR, /* TAR register */ REGSET_EBB, /* EBB registers */ REGSET_PMR, /* Performance Monitor Registers */ + REGSET_DEXCR, /* DEXCR registers */ +#ifdef CONFIG_CHECKPOINT_RESTORE + REGSET_HASHKEYR, /* HASHKEYR register */ +#endif #endif #ifdef CONFIG_PPC_MEM_KEYS REGSET_PKEY, /* AMR register */ diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c index 210ea834e603..447bff87fd21 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-tm.c +++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c @@ -12,7 +12,7 @@ void flush_tmregs_to_thread(struct task_struct *tsk) { /* * If task is not current, it will have been flushed already to - * it's thread_struct during __switch_to(). + * its thread_struct during __switch_to(). * * A reclaim flushes ALL the state or if not in TM save TM SPRs * in the appropriate thread structures from live. diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 2087a785f05f..0310f9097e39 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -290,6 +290,9 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, static int ppr_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { + if (!target->thread.regs) + return -EINVAL; + return membuf_write(&to, &target->thread.regs->ppr, sizeof(u64)); } @@ -297,6 +300,9 @@ static int ppr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { + if (!target->thread.regs) + return -EINVAL; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.regs->ppr, 0, sizeof(u64)); } @@ -448,7 +454,60 @@ static int pmu_set(struct task_struct *target, const struct user_regset *regset, 5 * sizeof(unsigned long)); return ret; } -#endif + +static int dexcr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -ENODEV; + + return regset->n; +} + +static int dexcr_get(struct task_struct *target, const struct user_regset *regset, + struct membuf to) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -ENODEV; + + membuf_store(&to, (u64)lower_32_bits(target->thread.dexcr)); + + /* + * Technically the HDEXCR is per-cpu, but a hypervisor can't reasonably + * change it between CPUs of the same guest. + */ + return membuf_store(&to, (u64)lower_32_bits(mfspr(SPRN_HDEXCR_RO))); +} + +#ifdef CONFIG_CHECKPOINT_RESTORE +static int hashkeyr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -ENODEV; + + return regset->n; +} + +static int hashkeyr_get(struct task_struct *target, const struct user_regset *regset, + struct membuf to) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -ENODEV; + + return membuf_store(&to, target->thread.hashkeyr); +} + +static int hashkeyr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, const void *kbuf, + const void __user *ubuf) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -ENODEV; + + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.hashkeyr, + 0, sizeof(unsigned long)); +} +#endif /* CONFIG_CHECKPOINT_RESTORE */ +#endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_PPC_MEM_KEYS static int pkey_active(struct task_struct *target, const struct user_regset *regset) @@ -509,110 +568,122 @@ static int pkey_set(struct task_struct *target, const struct user_regset *regset static const struct user_regset native_regsets[] = { [REGSET_GPR] = { - .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, + USER_REGSET_NOTE_TYPE(PRSTATUS), .n = ELF_NGREG, .size = sizeof(long), .align = sizeof(long), .regset_get = gpr_get, .set = gpr_set }, [REGSET_FPR] = { - .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, + USER_REGSET_NOTE_TYPE(PRFPREG), .n = ELF_NFPREG, .size = sizeof(double), .align = sizeof(double), .regset_get = fpr_get, .set = fpr_set }, #ifdef CONFIG_ALTIVEC [REGSET_VMX] = { - .core_note_type = NT_PPC_VMX, .n = 34, + USER_REGSET_NOTE_TYPE(PPC_VMX), .n = 34, .size = sizeof(vector128), .align = sizeof(vector128), .active = vr_active, .regset_get = vr_get, .set = vr_set }, #endif #ifdef CONFIG_VSX [REGSET_VSX] = { - .core_note_type = NT_PPC_VSX, .n = 32, + USER_REGSET_NOTE_TYPE(PPC_VSX), .n = 32, .size = sizeof(double), .align = sizeof(double), .active = vsr_active, .regset_get = vsr_get, .set = vsr_set }, #endif #ifdef CONFIG_SPE [REGSET_SPE] = { - .core_note_type = NT_PPC_SPE, .n = 35, + USER_REGSET_NOTE_TYPE(PPC_SPE), .n = 35, .size = sizeof(u32), .align = sizeof(u32), .active = evr_active, .regset_get = evr_get, .set = evr_set }, #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM [REGSET_TM_CGPR] = { - .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, + USER_REGSET_NOTE_TYPE(PPC_TM_CGPR), .n = ELF_NGREG, .size = sizeof(long), .align = sizeof(long), .active = tm_cgpr_active, .regset_get = tm_cgpr_get, .set = tm_cgpr_set }, [REGSET_TM_CFPR] = { - .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, + USER_REGSET_NOTE_TYPE(PPC_TM_CFPR), .n = ELF_NFPREG, .size = sizeof(double), .align = sizeof(double), .active = tm_cfpr_active, .regset_get = tm_cfpr_get, .set = tm_cfpr_set }, [REGSET_TM_CVMX] = { - .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, + USER_REGSET_NOTE_TYPE(PPC_TM_CVMX), .n = ELF_NVMX, .size = sizeof(vector128), .align = sizeof(vector128), .active = tm_cvmx_active, .regset_get = tm_cvmx_get, .set = tm_cvmx_set }, [REGSET_TM_CVSX] = { - .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, + USER_REGSET_NOTE_TYPE(PPC_TM_CVSX), .n = ELF_NVSX, .size = sizeof(double), .align = sizeof(double), .active = tm_cvsx_active, .regset_get = tm_cvsx_get, .set = tm_cvsx_set }, [REGSET_TM_SPR] = { - .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, + USER_REGSET_NOTE_TYPE(PPC_TM_SPR), .n = ELF_NTMSPRREG, .size = sizeof(u64), .align = sizeof(u64), .active = tm_spr_active, .regset_get = tm_spr_get, .set = tm_spr_set }, [REGSET_TM_CTAR] = { - .core_note_type = NT_PPC_TM_CTAR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_TM_CTAR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .active = tm_tar_active, .regset_get = tm_tar_get, .set = tm_tar_set }, [REGSET_TM_CPPR] = { - .core_note_type = NT_PPC_TM_CPPR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_TM_CPPR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .active = tm_ppr_active, .regset_get = tm_ppr_get, .set = tm_ppr_set }, [REGSET_TM_CDSCR] = { - .core_note_type = NT_PPC_TM_CDSCR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_TM_CDSCR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .active = tm_dscr_active, .regset_get = tm_dscr_get, .set = tm_dscr_set }, #endif #ifdef CONFIG_PPC64 [REGSET_PPR] = { - .core_note_type = NT_PPC_PPR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_PPR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .regset_get = ppr_get, .set = ppr_set }, [REGSET_DSCR] = { - .core_note_type = NT_PPC_DSCR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_DSCR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .regset_get = dscr_get, .set = dscr_set }, #endif #ifdef CONFIG_PPC_BOOK3S_64 [REGSET_TAR] = { - .core_note_type = NT_PPC_TAR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_TAR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .regset_get = tar_get, .set = tar_set }, [REGSET_EBB] = { - .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, + USER_REGSET_NOTE_TYPE(PPC_EBB), .n = ELF_NEBB, .size = sizeof(u64), .align = sizeof(u64), .active = ebb_active, .regset_get = ebb_get, .set = ebb_set }, [REGSET_PMR] = { - .core_note_type = NT_PPC_PMU, .n = ELF_NPMU, + USER_REGSET_NOTE_TYPE(PPC_PMU), .n = ELF_NPMU, .size = sizeof(u64), .align = sizeof(u64), .active = pmu_active, .regset_get = pmu_get, .set = pmu_set }, + [REGSET_DEXCR] = { + USER_REGSET_NOTE_TYPE(PPC_DEXCR), .n = ELF_NDEXCR, + .size = sizeof(u64), .align = sizeof(u64), + .active = dexcr_active, .regset_get = dexcr_get + }, +#ifdef CONFIG_CHECKPOINT_RESTORE + [REGSET_HASHKEYR] = { + USER_REGSET_NOTE_TYPE(PPC_HASHKEYR), .n = ELF_NHASHKEYR, + .size = sizeof(u64), .align = sizeof(u64), + .active = hashkeyr_active, .regset_get = hashkeyr_get, .set = hashkeyr_set + }, +#endif #endif #ifdef CONFIG_PPC_MEM_KEYS [REGSET_PKEY] = { - .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY, + USER_REGSET_NOTE_TYPE(PPC_PKEY), .n = ELF_NPKEY, .size = sizeof(u64), .align = sizeof(u64), .active = pkey_active, .regset_get = pkey_get, .set = pkey_set }, @@ -640,69 +711,86 @@ int gpr32_get_common(struct task_struct *target, return membuf_zero(&to, (ELF_NGREG - PT_REGS_COUNT) * sizeof(u32)); } -int gpr32_set_common(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf, - unsigned long *regs) +static int gpr32_set_common_kernel(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, unsigned long *regs) { const compat_ulong_t *k = kbuf; + + pos /= sizeof(compat_ulong_t); + count /= sizeof(compat_ulong_t); + + for (; count > 0 && pos < PT_MSR; --count) + regs[pos++] = *k++; + + if (count > 0 && pos == PT_MSR) { + set_user_msr(target, *k++); + ++pos; + --count; + } + + for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) + regs[pos++] = *k++; + for (; count > 0 && pos < PT_TRAP; --count, ++pos) + ++k; + + if (count > 0 && pos == PT_TRAP) { + set_user_trap(target, *k++); + ++pos; + --count; + } + + kbuf = k; + pos *= sizeof(compat_ulong_t); + count *= sizeof(compat_ulong_t); + user_regset_copyin_ignore(&pos, &count, &kbuf, NULL, + (PT_TRAP + 1) * sizeof(compat_ulong_t), -1); + return 0; +} + +static int gpr32_set_common_user(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void __user *ubuf, unsigned long *regs) +{ const compat_ulong_t __user *u = ubuf; + const void *kbuf = NULL; compat_ulong_t reg; - if (!kbuf && !user_read_access_begin(u, count)) + if (!user_read_access_begin(u, count)) return -EFAULT; pos /= sizeof(reg); count /= sizeof(reg); - if (kbuf) - for (; count > 0 && pos < PT_MSR; --count) - regs[pos++] = *k++; - else - for (; count > 0 && pos < PT_MSR; --count) { - unsafe_get_user(reg, u++, Efault); - regs[pos++] = reg; - } - + for (; count > 0 && pos < PT_MSR; --count) { + unsafe_get_user(reg, u++, Efault); + regs[pos++] = reg; + } if (count > 0 && pos == PT_MSR) { - if (kbuf) - reg = *k++; - else - unsafe_get_user(reg, u++, Efault); + unsafe_get_user(reg, u++, Efault); set_user_msr(target, reg); ++pos; --count; } - if (kbuf) { - for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) - regs[pos++] = *k++; - for (; count > 0 && pos < PT_TRAP; --count, ++pos) - ++k; - } else { - for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) { - unsafe_get_user(reg, u++, Efault); - regs[pos++] = reg; - } - for (; count > 0 && pos < PT_TRAP; --count, ++pos) - unsafe_get_user(reg, u++, Efault); + for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) { + unsafe_get_user(reg, u++, Efault); + regs[pos++] = reg; } + for (; count > 0 && pos < PT_TRAP; --count, ++pos) + unsafe_get_user(reg, u++, Efault); if (count > 0 && pos == PT_TRAP) { - if (kbuf) - reg = *k++; - else - unsafe_get_user(reg, u++, Efault); + unsafe_get_user(reg, u++, Efault); set_user_trap(target, reg); ++pos; --count; } - if (!kbuf) - user_read_access_end(); + user_read_access_end(); - kbuf = k; ubuf = u; pos *= sizeof(reg); count *= sizeof(reg); @@ -715,6 +803,18 @@ Efault: return -EFAULT; } +int gpr32_set_common(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf, + unsigned long *regs) +{ + if (kbuf) + return gpr32_set_common_kernel(target, regset, pos, count, kbuf, regs); + else + return gpr32_set_common_user(target, regset, pos, count, ubuf, regs); +} + static int gpr32_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) @@ -743,92 +843,92 @@ static int gpr32_set(struct task_struct *target, */ static const struct user_regset compat_regsets[] = { [REGSET_GPR] = { - .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, + USER_REGSET_NOTE_TYPE(PRSTATUS), .n = ELF_NGREG, .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), .regset_get = gpr32_get, .set = gpr32_set }, [REGSET_FPR] = { - .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, + USER_REGSET_NOTE_TYPE(PRFPREG), .n = ELF_NFPREG, .size = sizeof(double), .align = sizeof(double), .regset_get = fpr_get, .set = fpr_set }, #ifdef CONFIG_ALTIVEC [REGSET_VMX] = { - .core_note_type = NT_PPC_VMX, .n = 34, + USER_REGSET_NOTE_TYPE(PPC_VMX), .n = 34, .size = sizeof(vector128), .align = sizeof(vector128), .active = vr_active, .regset_get = vr_get, .set = vr_set }, #endif #ifdef CONFIG_SPE [REGSET_SPE] = { - .core_note_type = NT_PPC_SPE, .n = 35, + USER_REGSET_NOTE_TYPE(PPC_SPE), .n = 35, .size = sizeof(u32), .align = sizeof(u32), .active = evr_active, .regset_get = evr_get, .set = evr_set }, #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM [REGSET_TM_CGPR] = { - .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, + USER_REGSET_NOTE_TYPE(PPC_TM_CGPR), .n = ELF_NGREG, .size = sizeof(long), .align = sizeof(long), .active = tm_cgpr_active, .regset_get = tm_cgpr32_get, .set = tm_cgpr32_set }, [REGSET_TM_CFPR] = { - .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, + USER_REGSET_NOTE_TYPE(PPC_TM_CFPR), .n = ELF_NFPREG, .size = sizeof(double), .align = sizeof(double), .active = tm_cfpr_active, .regset_get = tm_cfpr_get, .set = tm_cfpr_set }, [REGSET_TM_CVMX] = { - .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, + USER_REGSET_NOTE_TYPE(PPC_TM_CVMX), .n = ELF_NVMX, .size = sizeof(vector128), .align = sizeof(vector128), .active = tm_cvmx_active, .regset_get = tm_cvmx_get, .set = tm_cvmx_set }, [REGSET_TM_CVSX] = { - .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, + USER_REGSET_NOTE_TYPE(PPC_TM_CVSX), .n = ELF_NVSX, .size = sizeof(double), .align = sizeof(double), .active = tm_cvsx_active, .regset_get = tm_cvsx_get, .set = tm_cvsx_set }, [REGSET_TM_SPR] = { - .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, + USER_REGSET_NOTE_TYPE(PPC_TM_SPR), .n = ELF_NTMSPRREG, .size = sizeof(u64), .align = sizeof(u64), .active = tm_spr_active, .regset_get = tm_spr_get, .set = tm_spr_set }, [REGSET_TM_CTAR] = { - .core_note_type = NT_PPC_TM_CTAR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_TM_CTAR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .active = tm_tar_active, .regset_get = tm_tar_get, .set = tm_tar_set }, [REGSET_TM_CPPR] = { - .core_note_type = NT_PPC_TM_CPPR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_TM_CPPR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .active = tm_ppr_active, .regset_get = tm_ppr_get, .set = tm_ppr_set }, [REGSET_TM_CDSCR] = { - .core_note_type = NT_PPC_TM_CDSCR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_TM_CDSCR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .active = tm_dscr_active, .regset_get = tm_dscr_get, .set = tm_dscr_set }, #endif #ifdef CONFIG_PPC64 [REGSET_PPR] = { - .core_note_type = NT_PPC_PPR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_PPR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .regset_get = ppr_get, .set = ppr_set }, [REGSET_DSCR] = { - .core_note_type = NT_PPC_DSCR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_DSCR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .regset_get = dscr_get, .set = dscr_set }, #endif #ifdef CONFIG_PPC_BOOK3S_64 [REGSET_TAR] = { - .core_note_type = NT_PPC_TAR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_TAR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .regset_get = tar_get, .set = tar_set }, [REGSET_EBB] = { - .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, + USER_REGSET_NOTE_TYPE(PPC_EBB), .n = ELF_NEBB, .size = sizeof(u64), .align = sizeof(u64), .active = ebb_active, .regset_get = ebb_get, .set = ebb_set }, diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 5d7a72b41ae7..c6997df63287 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * PowerPC version * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) @@ -9,10 +10,6 @@ * * Modified by Cort Dougan (cort@hq.fsmlabs.com) * and Paul Mackerras (paulus@samba.org). - * - * This file is subject to the terms and conditions of the GNU General - * Public License. See the file README.legal in the main directory of - * this archive for more details. */ #include <linux/regset.h> @@ -218,7 +215,7 @@ static int do_seccomp(struct pt_regs *regs) * have already loaded -ENOSYS into r3, or seccomp has put * something else in r3 (via SECCOMP_RET_ERRNO/TRACE). */ - if (__secure_computing(NULL)) + if (__secure_computing()) return -1; /* diff --git a/arch/powerpc/kernel/rethook.c b/arch/powerpc/kernel/rethook.c new file mode 100644 index 000000000000..5f5f47ae82cf --- /dev/null +++ b/arch/powerpc/kernel/rethook.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PowerPC implementation of rethook. This depends on kprobes. + */ + +#include <linux/kprobes.h> +#include <linux/rethook.h> + +/* + * Function return trampoline: + * - init_kprobes() establishes a probepoint here + * - When the probed function returns, this probe + * causes the handlers to fire + */ +asm(".global arch_rethook_trampoline\n" + ".type arch_rethook_trampoline, @function\n" + "arch_rethook_trampoline:\n" + "nop\n" + "blr\n" + ".size arch_rethook_trampoline, .-arch_rethook_trampoline\n"); + +/* + * Called when the probe at kretprobe trampoline is hit + */ +static int trampoline_rethook_handler(struct kprobe *p, struct pt_regs *regs) +{ + return !rethook_trampoline_handler(regs, regs->gpr[1]); +} +NOKPROBE_SYMBOL(trampoline_rethook_handler); + +void arch_rethook_prepare(struct rethook_node *rh, struct pt_regs *regs, bool mcount) +{ + rh->ret_addr = regs->link; + rh->frame = regs->gpr[1]; + + /* Replace the return addr with trampoline addr */ + regs->link = (unsigned long)arch_rethook_trampoline; +} +NOKPROBE_SYMBOL(arch_rethook_prepare); + +/* This is called from rethook_trampoline_handler(). */ +void arch_rethook_fixup_return(struct pt_regs *regs, unsigned long orig_ret_address) +{ + /* + * We get here through one of two paths: + * 1. by taking a trap -> kprobe_handler() -> here + * 2. by optprobe branch -> optimized_callback() -> opt_pre_handler() -> here + * + * When going back through (1), we need regs->nip to be setup properly + * as it is used to determine the return address from the trap. + * For (2), since nip is not honoured with optprobes, we instead setup + * the link register properly so that the subsequent 'blr' in + * arch_rethook_trampoline jumps back to the right instruction. + * + * For nip, we should set the address to the previous instruction since + * we end up emulating it in kprobe_handler(), which increments the nip + * again. + */ + regs_set_return_ip(regs, orig_ret_address - 4); + regs->link = orig_ret_address; +} +NOKPROBE_SYMBOL(arch_rethook_fixup_return); + +static struct kprobe trampoline_p = { + .addr = (kprobe_opcode_t *) &arch_rethook_trampoline, + .pre_handler = trampoline_rethook_handler +}; + +/* rethook initializer */ +int __init arch_init_kprobes(void) +{ + return register_kprobe(&trampoline_p); +} diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index 081b2b741a8c..f38df72e64b8 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -287,9 +287,9 @@ static ssize_t ppc_rtas_poweron_write(struct file *file, rtc_time64_to_tm(nowtime, &tm); - error = rtas_call(rtas_token("set-time-for-power-on"), 7, 1, NULL, - tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec, 0 /* nano */); + error = rtas_call(rtas_function_token(RTAS_FN_SET_TIME_FOR_POWER_ON), 7, 1, NULL, + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, 0 /* nano */); if (error) printk(KERN_WARNING "error: setting poweron time returned: %s\n", ppc_rtas_process_error(error)); @@ -350,9 +350,9 @@ static ssize_t ppc_rtas_clock_write(struct file *file, return error; rtc_time64_to_tm(nowtime, &tm); - error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL, - tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec, 0); + error = rtas_call(rtas_function_token(RTAS_FN_SET_TIME_OF_DAY), 7, 1, NULL, + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, 0); if (error) printk(KERN_WARNING "error: setting the clock returned: %s\n", ppc_rtas_process_error(error)); @@ -362,7 +362,7 @@ static ssize_t ppc_rtas_clock_write(struct file *file, static int ppc_rtas_clock_show(struct seq_file *m, void *v) { int ret[8]; - int error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + int error = rtas_call(rtas_function_token(RTAS_FN_GET_TIME_OF_DAY), 0, 8, ret); if (error) { printk(KERN_WARNING "error: reading the clock returned: %s\n", @@ -385,7 +385,7 @@ static int ppc_rtas_sensors_show(struct seq_file *m, void *v) { int i,j; int state, error; - int get_sensor_state = rtas_token("get-sensor-state"); + int get_sensor_state = rtas_function_token(RTAS_FN_GET_SENSOR_STATE); seq_printf(m, "RTAS (RunTime Abstraction Services) Sensor Information\n"); seq_printf(m, "Sensor\t\tValue\t\tCondition\tLocation\n"); @@ -708,8 +708,8 @@ static ssize_t ppc_rtas_tone_freq_write(struct file *file, return error; rtas_tone_frequency = freq; /* save it for later */ - error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL, - TONE_FREQUENCY, 0, freq); + error = rtas_call(rtas_function_token(RTAS_FN_SET_INDICATOR), 3, 1, NULL, + TONE_FREQUENCY, 0, freq); if (error) printk(KERN_WARNING "error: setting tone frequency returned: %s\n", ppc_rtas_process_error(error)); @@ -736,8 +736,8 @@ static ssize_t ppc_rtas_tone_volume_write(struct file *file, volume = 100; rtas_tone_volume = volume; /* save it for later */ - error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL, - TONE_VOLUME, 0, volume); + error = rtas_call(rtas_function_token(RTAS_FN_SET_INDICATOR), 3, 1, NULL, + TONE_VOLUME, 0, volume); if (error) printk(KERN_WARNING "error: setting tone volume returned: %s\n", ppc_rtas_process_error(error)); @@ -752,6 +752,8 @@ static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v) /** * ppc_rtas_rmo_buf_show() - Describe RTAS-addressable region for user space. + * @m: seq_file output target. + * @v: Unused. * * Base + size description of a range of RTAS-addressable memory set * aside for user space to use as work area(s) for certain RTAS diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c index 5a31d1829bca..6996214532bd 100644 --- a/arch/powerpc/kernel/rtas-rtc.c +++ b/arch/powerpc/kernel/rtas-rtc.c @@ -21,7 +21,7 @@ time64_t __init rtas_get_boot_time(void) max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; do { - error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + error = rtas_call(rtas_function_token(RTAS_FN_GET_TIME_OF_DAY), 0, 8, ret); wait_time = rtas_busy_delay_time(error); if (wait_time) { @@ -53,7 +53,7 @@ void rtas_get_rtc_time(struct rtc_time *rtc_tm) max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; do { - error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + error = rtas_call(rtas_function_token(RTAS_FN_GET_TIME_OF_DAY), 0, 8, ret); wait_time = rtas_busy_delay_time(error); if (wait_time) { @@ -90,7 +90,7 @@ int rtas_set_rtc_time(struct rtc_time *tm) max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; do { - error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL, + error = rtas_call(rtas_function_token(RTAS_FN_SET_TIME_OF_DAY), 7, 1, NULL, tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec, 0); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index deded51a7978..8d81c1e7a8db 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -9,12 +9,17 @@ #define pr_fmt(fmt) "rtas: " fmt +#include <linux/bsearch.h> #include <linux/capability.h> #include <linux/delay.h> #include <linux/export.h> #include <linux/init.h> +#include <linux/kconfig.h> #include <linux/kernel.h> +#include <linux/lockdep.h> #include <linux/memblock.h> +#include <linux/mutex.h> +#include <linux/nospec.h> #include <linux/of.h> #include <linux/of_fdt.h> #include <linux/reboot.h> @@ -26,6 +31,7 @@ #include <linux/syscalls.h> #include <linux/types.h> #include <linux/uaccess.h> +#include <linux/xarray.h> #include <asm/delay.h> #include <asm/firmware.h> @@ -33,43 +39,733 @@ #include <asm/machdep.h> #include <asm/mmu.h> #include <asm/page.h> +#include <asm/rtas-work-area.h> #include <asm/rtas.h> #include <asm/time.h> +#include <asm/trace.h> #include <asm/udbg.h> +struct rtas_filter { + /* Indexes into the args buffer, -1 if not used */ + const int buf_idx1; + const int size_idx1; + const int buf_idx2; + const int size_idx2; + /* + * Assumed buffer size per the spec if the function does not + * have a size parameter, e.g. ibm,errinjct. 0 if unused. + */ + const int fixed_size; +}; + +/** + * struct rtas_function - Descriptor for RTAS functions. + * + * @token: Value of @name if it exists under the /rtas node. + * @name: Function name. + * @filter: If non-NULL, invoking this function via the rtas syscall is + * generally allowed, and @filter describes constraints on the + * arguments. See also @banned_for_syscall_on_le. + * @banned_for_syscall_on_le: Set when call via sys_rtas is generally allowed + * but specifically restricted on ppc64le. Such + * functions are believed to have no users on + * ppc64le, and we want to keep it that way. It does + * not make sense for this to be set when @filter + * is NULL. + * @lock: Pointer to an optional dedicated per-function mutex. This + * should be set for functions that require multiple calls in + * sequence to complete a single operation, and such sequences + * will disrupt each other if allowed to interleave. Users of + * this function are required to hold the associated lock for + * the duration of the call sequence. Add an explanatory + * comment to the function table entry if setting this member. + */ +struct rtas_function { + s32 token; + const bool banned_for_syscall_on_le:1; + const char * const name; + const struct rtas_filter *filter; + struct mutex *lock; +}; + +/* + * Per-function locks for sequence-based RTAS functions. + */ +static DEFINE_MUTEX(rtas_ibm_activate_firmware_lock); +static DEFINE_MUTEX(rtas_ibm_lpar_perftools_lock); +DEFINE_MUTEX(rtas_ibm_physical_attestation_lock); +DEFINE_MUTEX(rtas_ibm_get_vpd_lock); +DEFINE_MUTEX(rtas_ibm_get_indices_lock); +DEFINE_MUTEX(rtas_ibm_set_dynamic_indicator_lock); +DEFINE_MUTEX(rtas_ibm_get_dynamic_sensor_state_lock); +DEFINE_MUTEX(rtas_ibm_receive_hvpipe_msg_lock); +DEFINE_MUTEX(rtas_ibm_send_hvpipe_msg_lock); + +static struct rtas_function rtas_function_table[] __ro_after_init = { + [RTAS_FNIDX__CHECK_EXCEPTION] = { + .name = "check-exception", + }, + [RTAS_FNIDX__DISPLAY_CHARACTER] = { + .name = "display-character", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__EVENT_SCAN] = { + .name = "event-scan", + }, + [RTAS_FNIDX__FREEZE_TIME_BASE] = { + .name = "freeze-time-base", + }, + [RTAS_FNIDX__GET_POWER_LEVEL] = { + .name = "get-power-level", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__GET_SENSOR_STATE] = { + .name = "get-sensor-state", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__GET_TERM_CHAR] = { + .name = "get-term-char", + }, + [RTAS_FNIDX__GET_TIME_OF_DAY] = { + .name = "get-time-of-day", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_ACTIVATE_FIRMWARE] = { + .name = "ibm,activate-firmware", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + /* + * PAPR+ as of v2.13 doesn't explicitly impose any + * restriction, but this typically requires multiple + * calls before success, and there's no reason to + * allow sequences to interleave. + */ + .lock = &rtas_ibm_activate_firmware_lock, + }, + [RTAS_FNIDX__IBM_CBE_START_PTCAL] = { + .name = "ibm,cbe-start-ptcal", + }, + [RTAS_FNIDX__IBM_CBE_STOP_PTCAL] = { + .name = "ibm,cbe-stop-ptcal", + }, + [RTAS_FNIDX__IBM_CHANGE_MSI] = { + .name = "ibm,change-msi", + }, + [RTAS_FNIDX__IBM_CLOSE_ERRINJCT] = { + .name = "ibm,close-errinjct", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_CONFIGURE_BRIDGE] = { + .name = "ibm,configure-bridge", + }, + [RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR] = { + .name = "ibm,configure-connector", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = -1, + .buf_idx2 = 1, .size_idx2 = -1, + .fixed_size = 4096, + }, + }, + [RTAS_FNIDX__IBM_CONFIGURE_KERNEL_DUMP] = { + .name = "ibm,configure-kernel-dump", + }, + [RTAS_FNIDX__IBM_CONFIGURE_PE] = { + .name = "ibm,configure-pe", + }, + [RTAS_FNIDX__IBM_CREATE_PE_DMA_WINDOW] = { + .name = "ibm,create-pe-dma-window", + }, + [RTAS_FNIDX__IBM_DISPLAY_MESSAGE] = { + .name = "ibm,display-message", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_ERRINJCT] = { + .name = "ibm,errinjct", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 2, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + .fixed_size = 1024, + }, + }, + [RTAS_FNIDX__IBM_EXTI2C] = { + .name = "ibm,exti2c", + }, + [RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO] = { + .name = "ibm,get-config-addr-info", + }, + [RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO2] = { + .name = "ibm,get-config-addr-info2", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_GET_DYNAMIC_SENSOR_STATE] = { + .name = "ibm,get-dynamic-sensor-state", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + /* + * PAPR+ v2.13 R1–7.3.19–3 is explicit that the OS + * must not call ibm,get-dynamic-sensor-state with + * different inputs until a non-retry status has been + * returned. + */ + .lock = &rtas_ibm_get_dynamic_sensor_state_lock, + }, + [RTAS_FNIDX__IBM_GET_INDICES] = { + .name = "ibm,get-indices", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 2, .size_idx1 = 3, + .buf_idx2 = -1, .size_idx2 = -1, + }, + /* + * PAPR+ v2.13 R1–7.3.17–2 says that the OS must not + * interleave ibm,get-indices call sequences with + * different inputs. + */ + .lock = &rtas_ibm_get_indices_lock, + }, + [RTAS_FNIDX__IBM_GET_RIO_TOPOLOGY] = { + .name = "ibm,get-rio-topology", + }, + [RTAS_FNIDX__IBM_GET_SYSTEM_PARAMETER] = { + .name = "ibm,get-system-parameter", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 1, .size_idx1 = 2, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_GET_VPD] = { + .name = "ibm,get-vpd", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = -1, + .buf_idx2 = 1, .size_idx2 = 2, + }, + /* + * PAPR+ v2.13 R1–7.3.20–4 indicates that sequences + * should not be allowed to interleave. + */ + .lock = &rtas_ibm_get_vpd_lock, + }, + [RTAS_FNIDX__IBM_GET_XIVE] = { + .name = "ibm,get-xive", + }, + [RTAS_FNIDX__IBM_INT_OFF] = { + .name = "ibm,int-off", + }, + [RTAS_FNIDX__IBM_INT_ON] = { + .name = "ibm,int-on", + }, + [RTAS_FNIDX__IBM_IO_QUIESCE_ACK] = { + .name = "ibm,io-quiesce-ack", + }, + [RTAS_FNIDX__IBM_LPAR_PERFTOOLS] = { + .name = "ibm,lpar-perftools", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 2, .size_idx1 = 3, + .buf_idx2 = -1, .size_idx2 = -1, + }, + /* + * PAPR+ v2.13 R1–7.3.26–6 says the OS should allow + * only one call sequence in progress at a time. + */ + .lock = &rtas_ibm_lpar_perftools_lock, + }, + [RTAS_FNIDX__IBM_MANAGE_FLASH_IMAGE] = { + .name = "ibm,manage-flash-image", + }, + [RTAS_FNIDX__IBM_MANAGE_STORAGE_PRESERVATION] = { + .name = "ibm,manage-storage-preservation", + }, + [RTAS_FNIDX__IBM_NMI_INTERLOCK] = { + .name = "ibm,nmi-interlock", + }, + [RTAS_FNIDX__IBM_NMI_REGISTER] = { + .name = "ibm,nmi-register", + }, + [RTAS_FNIDX__IBM_OPEN_ERRINJCT] = { + .name = "ibm,open-errinjct", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_OPEN_SRIOV_ALLOW_UNFREEZE] = { + .name = "ibm,open-sriov-allow-unfreeze", + }, + [RTAS_FNIDX__IBM_OPEN_SRIOV_MAP_PE_NUMBER] = { + .name = "ibm,open-sriov-map-pe-number", + }, + [RTAS_FNIDX__IBM_OS_TERM] = { + .name = "ibm,os-term", + }, + [RTAS_FNIDX__IBM_PARTNER_CONTROL] = { + .name = "ibm,partner-control", + }, + [RTAS_FNIDX__IBM_PHYSICAL_ATTESTATION] = { + .name = "ibm,physical-attestation", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = 1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + /* + * This follows a sequence-based pattern similar to + * ibm,get-vpd et al. Since PAPR+ restricts + * interleaving call sequences for other functions of + * this style, assume the restriction applies here, + * even though it's not explicit in the spec. + */ + .lock = &rtas_ibm_physical_attestation_lock, + }, + [RTAS_FNIDX__IBM_PLATFORM_DUMP] = { + .name = "ibm,platform-dump", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 4, .size_idx1 = 5, + .buf_idx2 = -1, .size_idx2 = -1, + }, + /* + * PAPR+ v2.13 7.3.3.4.1 indicates that concurrent + * sequences of ibm,platform-dump are allowed if they + * are operating on different dump tags. So leave the + * lock pointer unset for now. This may need + * reconsideration if kernel-internal users appear. + */ + }, + [RTAS_FNIDX__IBM_POWER_OFF_UPS] = { + .name = "ibm,power-off-ups", + }, + [RTAS_FNIDX__IBM_QUERY_INTERRUPT_SOURCE_NUMBER] = { + .name = "ibm,query-interrupt-source-number", + }, + [RTAS_FNIDX__IBM_QUERY_PE_DMA_WINDOW] = { + .name = "ibm,query-pe-dma-window", + }, + [RTAS_FNIDX__IBM_READ_PCI_CONFIG] = { + .name = "ibm,read-pci-config", + }, + [RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE] = { + .name = "ibm,read-slot-reset-state", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2] = { + .name = "ibm,read-slot-reset-state2", + }, + [RTAS_FNIDX__IBM_RECEIVE_HVPIPE_MSG] { + .name = "ibm,receive-hvpipe-msg", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = 1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + /* + * PAPR+ v2.13 R1–7.3.32.1 + */ + .lock = &rtas_ibm_receive_hvpipe_msg_lock, + }, + [RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW] = { + .name = "ibm,remove-pe-dma-window", + }, + [RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW] = { + /* + * Note: PAPR+ v2.13 7.3.31.4.1 spells this as + * "ibm,reset-pe-dma-windows" (plural), but RTAS + * implementations use the singular form in practice. + */ + .name = "ibm,reset-pe-dma-window", + }, + [RTAS_FNIDX__IBM_SCAN_LOG_DUMP] = { + .name = "ibm,scan-log-dump", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = 1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_SEND_HVPIPE_MSG] { + .name = "ibm,send-hvpipe-msg", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + /* + * PAPR+ v2.13 R1–7.3.32.2 + */ + .lock = &rtas_ibm_send_hvpipe_msg_lock, + }, + [RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR] = { + .name = "ibm,set-dynamic-indicator", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 2, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + /* + * PAPR+ v2.13 R1–7.3.18–3 says the OS must not call + * this function with different inputs until a + * non-retry status has been returned. + */ + .lock = &rtas_ibm_set_dynamic_indicator_lock, + }, + [RTAS_FNIDX__IBM_SET_EEH_OPTION] = { + .name = "ibm,set-eeh-option", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_SET_SLOT_RESET] = { + .name = "ibm,set-slot-reset", + }, + [RTAS_FNIDX__IBM_SET_SYSTEM_PARAMETER] = { + .name = "ibm,set-system-parameter", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_SET_XIVE] = { + .name = "ibm,set-xive", + }, + [RTAS_FNIDX__IBM_SLOT_ERROR_DETAIL] = { + .name = "ibm,slot-error-detail", + }, + [RTAS_FNIDX__IBM_SUSPEND_ME] = { + .name = "ibm,suspend-me", + .banned_for_syscall_on_le = true, + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_TUNE_DMA_PARMS] = { + .name = "ibm,tune-dma-parms", + }, + [RTAS_FNIDX__IBM_UPDATE_FLASH_64_AND_REBOOT] = { + .name = "ibm,update-flash-64-and-reboot", + }, + [RTAS_FNIDX__IBM_UPDATE_NODES] = { + .name = "ibm,update-nodes", + .banned_for_syscall_on_le = true, + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + .fixed_size = 4096, + }, + }, + [RTAS_FNIDX__IBM_UPDATE_PROPERTIES] = { + .name = "ibm,update-properties", + .banned_for_syscall_on_le = true, + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + .fixed_size = 4096, + }, + }, + [RTAS_FNIDX__IBM_VALIDATE_FLASH_IMAGE] = { + .name = "ibm,validate-flash-image", + }, + [RTAS_FNIDX__IBM_WRITE_PCI_CONFIG] = { + .name = "ibm,write-pci-config", + }, + [RTAS_FNIDX__NVRAM_FETCH] = { + .name = "nvram-fetch", + }, + [RTAS_FNIDX__NVRAM_STORE] = { + .name = "nvram-store", + }, + [RTAS_FNIDX__POWER_OFF] = { + .name = "power-off", + }, + [RTAS_FNIDX__PUT_TERM_CHAR] = { + .name = "put-term-char", + }, + [RTAS_FNIDX__QUERY_CPU_STOPPED_STATE] = { + .name = "query-cpu-stopped-state", + }, + [RTAS_FNIDX__READ_PCI_CONFIG] = { + .name = "read-pci-config", + }, + [RTAS_FNIDX__RTAS_LAST_ERROR] = { + .name = "rtas-last-error", + }, + [RTAS_FNIDX__SET_INDICATOR] = { + .name = "set-indicator", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__SET_POWER_LEVEL] = { + .name = "set-power-level", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__SET_TIME_FOR_POWER_ON] = { + .name = "set-time-for-power-on", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__SET_TIME_OF_DAY] = { + .name = "set-time-of-day", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__START_CPU] = { + .name = "start-cpu", + }, + [RTAS_FNIDX__STOP_SELF] = { + .name = "stop-self", + }, + [RTAS_FNIDX__SYSTEM_REBOOT] = { + .name = "system-reboot", + }, + [RTAS_FNIDX__THAW_TIME_BASE] = { + .name = "thaw-time-base", + }, + [RTAS_FNIDX__WRITE_PCI_CONFIG] = { + .name = "write-pci-config", + }, +}; + +#define for_each_rtas_function(funcp) \ + for (funcp = &rtas_function_table[0]; \ + funcp < &rtas_function_table[ARRAY_SIZE(rtas_function_table)]; \ + ++funcp) + +/* + * Nearly all RTAS calls need to be serialized. All uses of the + * default rtas_args block must hold rtas_lock. + * + * Exceptions to the RTAS serialization requirement (e.g. stop-self) + * must use a separate rtas_args structure. + */ +static DEFINE_RAW_SPINLOCK(rtas_lock); +static struct rtas_args rtas_args; + +/** + * rtas_function_token() - RTAS function token lookup. + * @handle: Function handle, e.g. RTAS_FN_EVENT_SCAN. + * + * Context: Any context. + * Return: the token value for the function if implemented by this platform, + * otherwise RTAS_UNKNOWN_SERVICE. + */ +s32 rtas_function_token(const rtas_fn_handle_t handle) +{ + const size_t index = handle.index; + const bool out_of_bounds = index >= ARRAY_SIZE(rtas_function_table); + + if (WARN_ONCE(out_of_bounds, "invalid function index %zu", index)) + return RTAS_UNKNOWN_SERVICE; + /* + * Various drivers attempt token lookups on non-RTAS + * platforms. + */ + if (!rtas.dev) + return RTAS_UNKNOWN_SERVICE; + + return rtas_function_table[index].token; +} +EXPORT_SYMBOL_GPL(rtas_function_token); + +static int rtas_function_cmp(const void *a, const void *b) +{ + const struct rtas_function *f1 = a; + const struct rtas_function *f2 = b; + + return strcmp(f1->name, f2->name); +} + +/* + * Boot-time initialization of the function table needs the lookup to + * return a non-const-qualified object. Use rtas_name_to_function() + * in all other contexts. + */ +static struct rtas_function *__rtas_name_to_function(const char *name) +{ + const struct rtas_function key = { + .name = name, + }; + struct rtas_function *found; + + found = bsearch(&key, rtas_function_table, ARRAY_SIZE(rtas_function_table), + sizeof(rtas_function_table[0]), rtas_function_cmp); + + return found; +} + +static const struct rtas_function *rtas_name_to_function(const char *name) +{ + return __rtas_name_to_function(name); +} + +static DEFINE_XARRAY(rtas_token_to_function_xarray); + +static int __init rtas_token_to_function_xarray_init(void) +{ + const struct rtas_function *func; + int err = 0; + + for_each_rtas_function(func) { + const s32 token = func->token; + + if (token == RTAS_UNKNOWN_SERVICE) + continue; + + err = xa_err(xa_store(&rtas_token_to_function_xarray, + token, (void *)func, GFP_KERNEL)); + if (err) + break; + } + + return err; +} +arch_initcall(rtas_token_to_function_xarray_init); + +/* + * For use by sys_rtas(), where the token value is provided by user + * space and we don't want to warn on failed lookups. + */ +static const struct rtas_function *rtas_token_to_function_untrusted(s32 token) +{ + return xa_load(&rtas_token_to_function_xarray, token); +} + +/* + * Reverse lookup for deriving the function descriptor from a + * known-good token value in contexts where the former is not already + * available. @token must be valid, e.g. derived from the result of a + * prior lookup against the function table. + */ +static const struct rtas_function *rtas_token_to_function(s32 token) +{ + const struct rtas_function *func; + + if (WARN_ONCE(token < 0, "invalid token %d", token)) + return NULL; + + func = rtas_token_to_function_untrusted(token); + if (func) + return func; + /* + * Fall back to linear scan in case the reverse mapping hasn't + * been initialized yet. + */ + if (xa_empty(&rtas_token_to_function_xarray)) { + for_each_rtas_function(func) { + if (func->token == token) + return func; + } + } + + WARN_ONCE(true, "unexpected failed lookup for token %d", token); + return NULL; +} + /* This is here deliberately so it's only used in this file */ void enter_rtas(unsigned long); -static inline void do_enter_rtas(unsigned long args) +static void __do_enter_rtas(struct rtas_args *args) { - unsigned long msr; + enter_rtas(__pa(args)); + srr_regs_clobbered(); /* rtas uses SRRs, invalidate */ +} +static void __do_enter_rtas_trace(struct rtas_args *args) +{ + const struct rtas_function *func = rtas_token_to_function(be32_to_cpu(args->token)); + + /* + * If there is a per-function lock, it must be held by the + * caller. + */ + if (func->lock) + lockdep_assert_held(func->lock); + + if (args == &rtas_args) + lockdep_assert_held(&rtas_lock); + + trace_rtas_input(args, func->name); + trace_rtas_ll_entry(args); + + __do_enter_rtas(args); + + trace_rtas_ll_exit(args); + trace_rtas_output(args, func->name); +} + +static void do_enter_rtas(struct rtas_args *args) +{ + const unsigned long msr = mfmsr(); + /* + * Situations where we want to skip any active tracepoints for + * safety reasons: + * + * 1. The last code executed on an offline CPU as it stops, + * i.e. we're about to call stop-self. The tracepoints' + * function name lookup uses xarray, which uses RCU, which + * isn't valid to call on an offline CPU. Any events + * emitted on an offline CPU will be discarded anyway. + * + * 2. In real mode, as when invoking ibm,nmi-interlock from + * the pseries MCE handler. We cannot count on trace + * buffers or the entries in rtas_token_to_function_xarray + * to be contained in the RMO. + */ + const unsigned long mask = MSR_IR | MSR_DR; + const bool can_trace = likely(cpu_online(raw_smp_processor_id()) && + (msr & mask) == mask); /* * Make sure MSR[RI] is currently enabled as it will be forced later * in enter_rtas. */ - msr = mfmsr(); BUG_ON(!(msr & MSR_RI)); BUG_ON(!irqs_disabled()); hard_irq_disable(); /* Ensure MSR[EE] is disabled on PPC64 */ - enter_rtas(args); - - srr_regs_clobbered(); /* rtas uses SRRs, invalidate */ + if (can_trace) + __do_enter_rtas_trace(args); + else + __do_enter_rtas(args); } -struct rtas_t rtas = { - .lock = __ARCH_SPIN_LOCK_UNLOCKED -}; -EXPORT_SYMBOL(rtas); +struct rtas_t rtas; DEFINE_SPINLOCK(rtas_data_buf_lock); -EXPORT_SYMBOL(rtas_data_buf_lock); +EXPORT_SYMBOL_GPL(rtas_data_buf_lock); -char rtas_data_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned; -EXPORT_SYMBOL(rtas_data_buf); +char rtas_data_buf[RTAS_DATA_BUF_SIZE] __aligned(SZ_4K); +EXPORT_SYMBOL_GPL(rtas_data_buf); unsigned long rtas_rmo_buf; @@ -78,29 +774,7 @@ unsigned long rtas_rmo_buf; * This is done like this so rtas_flash can be a module. */ void (*rtas_flash_term_hook)(int); -EXPORT_SYMBOL(rtas_flash_term_hook); - -/* RTAS use home made raw locking instead of spin_lock_irqsave - * because those can be called from within really nasty contexts - * such as having the timebase stopped which would lockup with - * normal locks and spinlock debugging enabled - */ -static unsigned long lock_rtas(void) -{ - unsigned long flags; - - local_irq_save(flags); - preempt_disable(); - arch_spin_lock(&rtas.lock); - return flags; -} - -static void unlock_rtas(unsigned long flags) -{ - arch_spin_unlock(&rtas.lock); - local_irq_restore(flags); - preempt_enable(); -} +EXPORT_SYMBOL_GPL(rtas_flash_term_hook); /* * call_rtas_display_status and call_rtas_display_status_delay @@ -109,14 +783,14 @@ static void unlock_rtas(unsigned long flags) */ static void call_rtas_display_status(unsigned char c) { - unsigned long s; + unsigned long flags; if (!rtas.base) return; - s = lock_rtas(); - rtas_call_unlocked(&rtas.args, 10, 1, 1, NULL, c); - unlock_rtas(s); + raw_spin_lock_irqsave(&rtas_lock, flags); + rtas_call_unlocked(&rtas_args, 10, 1, 1, NULL, c); + raw_spin_unlock_irqrestore(&rtas_lock, flags); } static void call_rtas_display_status_delay(char c) @@ -124,7 +798,7 @@ static void call_rtas_display_status_delay(char c) static int pending_newline = 0; /* did last write end with unprinted newline? */ static int width = 16; - if (c == '\n') { + if (c == '\n') { while (width-- > 0) call_rtas_display_status(' '); width = 16; @@ -134,7 +808,7 @@ static void call_rtas_display_status_delay(char c) if (pending_newline) { call_rtas_display_status('\r'); call_rtas_display_status('\n'); - } + } pending_newline = 0; if (width--) { call_rtas_display_status(c); @@ -148,66 +822,6 @@ void __init udbg_init_rtas_panel(void) udbg_putc = call_rtas_display_status_delay; } -#ifdef CONFIG_UDBG_RTAS_CONSOLE - -/* If you think you're dying before early_init_dt_scan_rtas() does its - * work, you can hard code the token values for your firmware here and - * hardcode rtas.base/entry etc. - */ -static unsigned int rtas_putchar_token = RTAS_UNKNOWN_SERVICE; -static unsigned int rtas_getchar_token = RTAS_UNKNOWN_SERVICE; - -static void udbg_rtascon_putc(char c) -{ - int tries; - - if (!rtas.base) - return; - - /* Add CRs before LFs */ - if (c == '\n') - udbg_rtascon_putc('\r'); - - /* if there is more than one character to be displayed, wait a bit */ - for (tries = 0; tries < 16; tries++) { - if (rtas_call(rtas_putchar_token, 1, 1, NULL, c) == 0) - break; - udelay(1000); - } -} - -static int udbg_rtascon_getc_poll(void) -{ - int c; - - if (!rtas.base) - return -1; - - if (rtas_call(rtas_getchar_token, 0, 2, &c)) - return -1; - - return c; -} - -static int udbg_rtascon_getc(void) -{ - int c; - - while ((c = udbg_rtascon_getc_poll()) == -1) - ; - - return c; -} - - -void __init udbg_init_rtas_console(void) -{ - udbg_putc = udbg_rtascon_putc; - udbg_getc = udbg_rtascon_getc; - udbg_getc_poll = udbg_rtascon_getc_poll; -} -#endif /* CONFIG_UDBG_RTAS_CONSOLE */ - void rtas_progress(char *s, unsigned short hex) { struct device_node *root; @@ -240,8 +854,8 @@ void rtas_progress(char *s, unsigned short hex) "ibm,display-truncation-length", NULL); of_node_put(root); } - display_character = rtas_token("display-character"); - set_indicator = rtas_token("set-indicator"); + display_character = rtas_function_token(RTAS_FN_DISPLAY_CHARACTER); + set_indicator = rtas_function_token(RTAS_FN_SET_INDICATOR); } if (display_character == RTAS_UNKNOWN_SERVICE) { @@ -274,7 +888,7 @@ void rtas_progress(char *s, unsigned short hex) else rtas_call(display_character, 1, 1, NULL, '\r'); } - + if (row_width) width = row_width[current_line]; else @@ -294,9 +908,9 @@ void rtas_progress(char *s, unsigned short hex) spin_unlock(&progress_lock); return; } - + /* RTAS wants CR-LF, not just LF */ - + if (*os == '\n') { rtas_call(display_character, 1, 1, NULL, '\r'); rtas_call(display_character, 1, 1, NULL, '\n'); @@ -306,7 +920,7 @@ void rtas_progress(char *s, unsigned short hex) */ rtas_call(display_character, 1, 1, NULL, *os); } - + if (row_width) width = row_width[current_line]; else @@ -315,34 +929,44 @@ void rtas_progress(char *s, unsigned short hex) width--; rtas_call(display_character, 1, 1, NULL, *os); } - + os++; - + /* if we overwrite the screen length */ if (width <= 0) while ((*os != 0) && (*os != '\n') && (*os != '\r')) os++; } - + spin_unlock(&progress_lock); } -EXPORT_SYMBOL(rtas_progress); /* needed by rtas_flash module */ +EXPORT_SYMBOL_GPL(rtas_progress); /* needed by rtas_flash module */ int rtas_token(const char *service) { + const struct rtas_function *func; const __be32 *tokp; + if (rtas.dev == NULL) return RTAS_UNKNOWN_SERVICE; + + func = rtas_name_to_function(service); + if (func) + return func->token; + /* + * The caller is looking up a name that is not known to be an + * RTAS function. Either it's a function that needs to be + * added to the table, or they're misusing rtas_token() to + * access non-function properties of the /rtas node. Warn and + * fall back to the legacy behavior. + */ + WARN_ONCE(1, "unknown function `%s`, should it be added to rtas_function_table?\n", + service); + tokp = of_get_property(rtas.dev, service, NULL); return tokp ? be32_to_cpu(*tokp) : RTAS_UNKNOWN_SERVICE; } -EXPORT_SYMBOL(rtas_token); - -int rtas_service_present(const char *service) -{ - return rtas_token(service) != RTAS_UNKNOWN_SERVICE; -} -EXPORT_SYMBOL(rtas_service_present); +EXPORT_SYMBOL_GPL(rtas_token); #ifdef CONFIG_RTAS_ERROR_LOGGING @@ -357,7 +981,6 @@ int rtas_get_error_log_max(void) { return rtas_error_log_max; } -EXPORT_SYMBOL(rtas_get_error_log_max); static void __init init_error_log_max(void) { @@ -381,39 +1004,41 @@ static void __init init_error_log_max(void) static char rtas_err_buf[RTAS_ERROR_LOG_MAX]; -static int rtas_last_error_token; /** Return a copy of the detailed error text associated with the * most recent failed call to rtas. Because the error text * might go stale if there are any other intervening rtas calls, * this routine must be called atomically with whatever produced - * the error (i.e. with rtas.lock still held from the previous call). + * the error (i.e. with rtas_lock still held from the previous call). */ static char *__fetch_rtas_last_error(char *altbuf) { + const s32 token = rtas_function_token(RTAS_FN_RTAS_LAST_ERROR); struct rtas_args err_args, save_args; u32 bufsz; char *buf = NULL; - if (rtas_last_error_token == -1) + lockdep_assert_held(&rtas_lock); + + if (token == -1) return NULL; bufsz = rtas_get_error_log_max(); - err_args.token = cpu_to_be32(rtas_last_error_token); + err_args.token = cpu_to_be32(token); err_args.nargs = cpu_to_be32(2); err_args.nret = cpu_to_be32(1); err_args.args[0] = cpu_to_be32(__pa(rtas_err_buf)); err_args.args[1] = cpu_to_be32(bufsz); err_args.args[2] = 0; - save_args = rtas.args; - rtas.args = err_args; + save_args = rtas_args; + rtas_args = err_args; - do_enter_rtas(__pa(&rtas.args)); + do_enter_rtas(&rtas_args); - err_args = rtas.args; - rtas.args = save_args; + err_args = rtas_args; + rtas_args = save_args; /* Log the error in the unlikely case that there was one. */ if (unlikely(err_args.args[2] == 0)) { @@ -425,7 +1050,7 @@ static char *__fetch_rtas_last_error(char *altbuf) buf = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC); } if (buf) - memcpy(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX); + memmove(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX); } return buf; @@ -457,9 +1082,26 @@ va_rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, for (i = 0; i < nret; ++i) args->rets[i] = 0; - do_enter_rtas(__pa(args)); + do_enter_rtas(args); } +/** + * rtas_call_unlocked() - Invoke an RTAS firmware function without synchronization. + * @args: RTAS parameter block to be used for the call, must obey RTAS addressing + * constraints. + * @token: Identifies the function being invoked. + * @nargs: Number of input parameters. Does not include token. + * @nret: Number of output parameters, including the call status. + * @....: List of @nargs input parameters. + * + * Invokes the RTAS function indicated by @token, which the caller + * should obtain via rtas_function_token(). + * + * This function is similar to rtas_call(), but must be used with a + * limited set of RTAS calls specifically exempted from the general + * requirement that only one RTAS call may be in progress at any + * time. Examples include stop-self and ibm,nmi-interlock. + */ void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...) { va_list list; @@ -469,8 +1111,11 @@ void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, va_end(list); } -static int ibm_open_errinjct_token; -static int ibm_errinjct_token; +static bool token_is_restricted_errinjct(s32 token) +{ + return token == rtas_function_token(RTAS_FN_IBM_OPEN_ERRINJCT) || + token == rtas_function_token(RTAS_FN_IBM_ERRINJCT); +} /** * rtas_call() - Invoke an RTAS firmware function. @@ -481,7 +1126,7 @@ static int ibm_errinjct_token; * @....: List of @nargs input parameters. * * Invokes the RTAS function indicated by @token, which the caller - * should obtain via rtas_token(). + * should obtain via rtas_function_token(). * * The @nargs and @nret arguments must match the number of input and * output parameters specified for the RTAS function. @@ -532,17 +1177,18 @@ static int ibm_errinjct_token; */ int rtas_call(int token, int nargs, int nret, int *outputs, ...) { + struct pin_cookie cookie; va_list list; int i; - unsigned long s; - struct rtas_args *rtas_args; + unsigned long flags; + struct rtas_args *args; char *buff_copy = NULL; int ret; if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE) return -1; - if (token == ibm_open_errinjct_token || token == ibm_errinjct_token) { + if (token_is_restricted_errinjct(token)) { /* * It would be nicer to not discard the error value * from security_locked_down(), but callers expect an @@ -557,26 +1203,28 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) return -1; } - s = lock_rtas(); + raw_spin_lock_irqsave(&rtas_lock, flags); + cookie = lockdep_pin_lock(&rtas_lock); /* We use the global rtas args buffer */ - rtas_args = &rtas.args; + args = &rtas_args; va_start(list, outputs); - va_rtas_call_unlocked(rtas_args, token, nargs, nret, list); + va_rtas_call_unlocked(args, token, nargs, nret, list); va_end(list); /* A -1 return code indicates that the last command couldn't be completed due to a hardware error. */ - if (be32_to_cpu(rtas_args->rets[0]) == -1) + if (be32_to_cpu(args->rets[0]) == -1) buff_copy = __fetch_rtas_last_error(NULL); if (nret > 1 && outputs != NULL) for (i = 0; i < nret-1; ++i) - outputs[i] = be32_to_cpu(rtas_args->rets[i+1]); - ret = (nret > 0)? be32_to_cpu(rtas_args->rets[0]): 0; + outputs[i] = be32_to_cpu(args->rets[i + 1]); + ret = (nret > 0) ? be32_to_cpu(args->rets[0]) : 0; - unlock_rtas(s); + lockdep_unpin_lock(&rtas_lock, cookie); + raw_spin_unlock_irqrestore(&rtas_lock, flags); if (buff_copy) { log_error(buff_copy, ERR_TYPE_RTAS_LOG, 0); @@ -585,7 +1233,7 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) } return ret; } -EXPORT_SYMBOL(rtas_call); +EXPORT_SYMBOL_GPL(rtas_call); /** * rtas_busy_delay_time() - From an RTAS status value, calculate the @@ -623,7 +1271,47 @@ unsigned int rtas_busy_delay_time(int status) return ms; } -EXPORT_SYMBOL(rtas_busy_delay_time); + +/* + * Early boot fallback for rtas_busy_delay(). + */ +static bool __init rtas_busy_delay_early(int status) +{ + static size_t successive_ext_delays __initdata; + bool retry; + + switch (status) { + case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX: + /* + * In the unlikely case that we receive an extended + * delay status in early boot, the OS is probably not + * the cause, and there's nothing we can do to clear + * the condition. Best we can do is delay for a bit + * and hope it's transient. Lie to the caller if it + * seems like we're stuck in a retry loop. + */ + mdelay(1); + retry = true; + successive_ext_delays += 1; + if (successive_ext_delays > 1000) { + pr_err("too many extended delays, giving up\n"); + dump_stack(); + retry = false; + successive_ext_delays = 0; + } + break; + case RTAS_BUSY: + retry = true; + successive_ext_delays = 0; + break; + default: + retry = false; + successive_ext_delays = 0; + break; + } + + return retry; +} /** * rtas_busy_delay() - helper for RTAS busy and extended delay statuses @@ -643,11 +1331,17 @@ EXPORT_SYMBOL(rtas_busy_delay_time); * * false - @status is not @RTAS_BUSY nor an extended delay hint. The * caller is responsible for handling @status. */ -bool rtas_busy_delay(int status) +bool __ref rtas_busy_delay(int status) { unsigned int ms; bool ret; + /* + * Can't do timed sleeps before timekeeping is up. + */ + if (system_state < SYSTEM_SCHEDULING) + return rtas_busy_delay_early(status); + switch (status) { case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX: ret = true; @@ -660,21 +1354,14 @@ bool rtas_busy_delay(int status) */ ms = clamp(ms, 1U, 1000U); /* - * The delay hint is an order-of-magnitude suggestion, not - * a minimum. It is fine, possibly even advantageous, for - * us to pause for less time than hinted. For small values, - * use usleep_range() to ensure we don't sleep much longer - * than actually needed. - * - * See Documentation/timers/timers-howto.rst for - * explanation of the threshold used here. In effect we use - * usleep_range() for 9900 and 9901, msleep() for - * 9902-9905. + * The delay hint is an order-of-magnitude suggestion, not a + * minimum. It is fine, possibly even advantageous, for us to + * pause for less time than hinted. To make sure pause time will + * not be way longer than requested independent of HZ + * configuration, use fsleep(). See fsleep() for details of + * used sleeping functions. */ - if (ms <= 20) - usleep_range(ms * 100, ms * 1000); - else - msleep(ms); + fsleep(ms * 1000); break; case RTAS_BUSY: ret = true; @@ -697,39 +1384,40 @@ bool rtas_busy_delay(int status) return ret; } -EXPORT_SYMBOL(rtas_busy_delay); +EXPORT_SYMBOL_GPL(rtas_busy_delay); -static int rtas_error_rc(int rtas_rc) +int rtas_error_rc(int rtas_rc) { int rc; switch (rtas_rc) { - case -1: /* Hardware Error */ - rc = -EIO; - break; - case -3: /* Bad indicator/domain/etc */ - rc = -EINVAL; - break; - case -9000: /* Isolation error */ - rc = -EFAULT; - break; - case -9001: /* Outstanding TCE/PTE */ - rc = -EEXIST; - break; - case -9002: /* No usable slot */ - rc = -ENODEV; - break; - default: - pr_err("%s: unexpected error %d\n", __func__, rtas_rc); - rc = -ERANGE; - break; + case RTAS_HARDWARE_ERROR: /* Hardware Error */ + rc = -EIO; + break; + case RTAS_INVALID_PARAMETER: /* Bad indicator/domain/etc */ + rc = -EINVAL; + break; + case -9000: /* Isolation error */ + rc = -EFAULT; + break; + case -9001: /* Outstanding TCE/PTE */ + rc = -EEXIST; + break; + case -9002: /* No usable slot */ + rc = -ENODEV; + break; + default: + pr_err("%s: unexpected error %d\n", __func__, rtas_rc); + rc = -ERANGE; + break; } return rc; } +EXPORT_SYMBOL_GPL(rtas_error_rc); int rtas_get_power_level(int powerdomain, int *level) { - int token = rtas_token("get-power-level"); + int token = rtas_function_token(RTAS_FN_GET_POWER_LEVEL); int rc; if (token == RTAS_UNKNOWN_SERVICE) @@ -742,11 +1430,11 @@ int rtas_get_power_level(int powerdomain, int *level) return rtas_error_rc(rc); return rc; } -EXPORT_SYMBOL(rtas_get_power_level); +EXPORT_SYMBOL_GPL(rtas_get_power_level); int rtas_set_power_level(int powerdomain, int level, int *setlevel) { - int token = rtas_token("set-power-level"); + int token = rtas_function_token(RTAS_FN_SET_POWER_LEVEL); int rc; if (token == RTAS_UNKNOWN_SERVICE) @@ -760,11 +1448,11 @@ int rtas_set_power_level(int powerdomain, int level, int *setlevel) return rtas_error_rc(rc); return rc; } -EXPORT_SYMBOL(rtas_set_power_level); +EXPORT_SYMBOL_GPL(rtas_set_power_level); int rtas_get_sensor(int sensor, int index, int *state) { - int token = rtas_token("get-sensor-state"); + int token = rtas_function_token(RTAS_FN_GET_SENSOR_STATE); int rc; if (token == RTAS_UNKNOWN_SERVICE) @@ -778,11 +1466,11 @@ int rtas_get_sensor(int sensor, int index, int *state) return rtas_error_rc(rc); return rc; } -EXPORT_SYMBOL(rtas_get_sensor); +EXPORT_SYMBOL_GPL(rtas_get_sensor); int rtas_get_sensor_fast(int sensor, int index, int *state) { - int token = rtas_token("get-sensor-state"); + int token = rtas_function_token(RTAS_FN_GET_SENSOR_STATE); int rc; if (token == RTAS_UNKNOWN_SERVICE) @@ -821,11 +1509,10 @@ bool rtas_indicator_present(int token, int *maxindex) return false; } -EXPORT_SYMBOL(rtas_indicator_present); int rtas_set_indicator(int indicator, int index, int new_value) { - int token = rtas_token("set-indicator"); + int token = rtas_function_token(RTAS_FN_SET_INDICATOR); int rc; if (token == RTAS_UNKNOWN_SERVICE) @@ -839,15 +1526,15 @@ int rtas_set_indicator(int indicator, int index, int new_value) return rtas_error_rc(rc); return rc; } -EXPORT_SYMBOL(rtas_set_indicator); +EXPORT_SYMBOL_GPL(rtas_set_indicator); /* * Ignoring RTAS extended delay */ int rtas_set_indicator_fast(int indicator, int index, int new_value) { + int token = rtas_function_token(RTAS_FN_SET_INDICATOR); int rc; - int token = rtas_token("set-indicator"); if (token == RTAS_UNKNOWN_SERVICE) return -ENOENT; @@ -889,10 +1576,11 @@ int rtas_set_indicator_fast(int indicator, int index, int new_value) */ int rtas_ibm_suspend_me(int *fw_status) { + int token = rtas_function_token(RTAS_FN_IBM_SUSPEND_ME); int fwrc; int ret; - fwrc = rtas_call(rtas_token("ibm,suspend-me"), 0, 1, NULL); + fwrc = rtas_call(token, 0, 1, NULL); switch (fwrc) { case 0: @@ -925,7 +1613,7 @@ void __noreturn rtas_restart(char *cmd) if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_RESTART); pr_emerg("system-reboot returned %d\n", - rtas_call(rtas_token("system-reboot"), 0, 1, NULL)); + rtas_call(rtas_function_token(RTAS_FN_SYSTEM_REBOOT), 0, 1, NULL)); for (;;); } @@ -935,7 +1623,7 @@ void rtas_power_off(void) rtas_flash_term_hook(SYS_POWER_OFF); /* allow power on only with power button press */ pr_emerg("power-off returned %d\n", - rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); + rtas_call(rtas_function_token(RTAS_FN_POWER_OFF), 2, 1, NULL, -1, -1)); for (;;); } @@ -945,16 +1633,18 @@ void __noreturn rtas_halt(void) rtas_flash_term_hook(SYS_HALT); /* allow power on only with power button press */ pr_emerg("power-off returned %d\n", - rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); + rtas_call(rtas_function_token(RTAS_FN_POWER_OFF), 2, 1, NULL, -1, -1)); for (;;); } /* Must be in the RMO region, so we place it here */ static char rtas_os_term_buf[2048]; -static s32 ibm_os_term_token = RTAS_UNKNOWN_SERVICE; +static bool ibm_extended_os_term; void rtas_os_term(char *str) { + s32 token = rtas_function_token(RTAS_FN_IBM_OS_TERM); + static struct rtas_args args; int status; /* @@ -963,7 +1653,8 @@ void rtas_os_term(char *str) * this property may terminate the partition which we want to avoid * since it interferes with panic_timeout. */ - if (ibm_os_term_token == RTAS_UNKNOWN_SERVICE) + + if (token == RTAS_UNKNOWN_SERVICE || !ibm_extended_os_term) return; snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str); @@ -974,8 +1665,8 @@ void rtas_os_term(char *str) * schedules. */ do { - status = rtas_call(ibm_os_term_token, 1, 1, NULL, - __pa(rtas_os_term_buf)); + rtas_call_unlocked(&args, token, 1, 1, NULL, __pa(rtas_os_term_buf)); + status = be32_to_cpu(args.rets[0]); } while (rtas_busy_delay_time(status)); if (status != 0) @@ -995,19 +1686,22 @@ void rtas_os_term(char *str) */ void rtas_activate_firmware(void) { - int token; + int token = rtas_function_token(RTAS_FN_IBM_ACTIVATE_FIRMWARE); int fwrc; - token = rtas_token("ibm,activate-firmware"); if (token == RTAS_UNKNOWN_SERVICE) { pr_notice("ibm,activate-firmware method unavailable\n"); return; } + mutex_lock(&rtas_ibm_activate_firmware_lock); + do { fwrc = rtas_call(token, 0, 1, NULL); } while (rtas_busy_delay(fwrc)); + mutex_unlock(&rtas_ibm_activate_firmware_lock); + if (fwrc) pr_err("ibm,activate-firmware failed (%i)\n", fwrc); } @@ -1063,56 +1757,12 @@ noinstr struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log * * Accordingly, we filter RTAS requests to check that the call is * permitted, and that provided pointers fall within the RMO buffer. - * The rtas_filters list contains an entry for each permitted call, - * with the indexes of the parameters which are expected to contain - * addresses and sizes of buffers allocated inside the RMO buffer. + * If a function is allowed to be invoked via the syscall, then its + * entry in the rtas_functions table points to a rtas_filter that + * describes its constraints, with the indexes of the parameters which + * are expected to contain addresses and sizes of buffers allocated + * inside the RMO buffer. */ -struct rtas_filter { - const char *name; - int token; - /* Indexes into the args buffer, -1 if not used */ - int buf_idx1; - int size_idx1; - int buf_idx2; - int size_idx2; - - int fixed_size; -}; - -static struct rtas_filter rtas_filters[] __ro_after_init = { - { "ibm,activate-firmware", -1, -1, -1, -1, -1 }, - { "ibm,configure-connector", -1, 0, -1, 1, -1, 4096 }, /* Special cased */ - { "display-character", -1, -1, -1, -1, -1 }, - { "ibm,display-message", -1, 0, -1, -1, -1 }, - { "ibm,errinjct", -1, 2, -1, -1, -1, 1024 }, - { "ibm,close-errinjct", -1, -1, -1, -1, -1 }, - { "ibm,open-errinjct", -1, -1, -1, -1, -1 }, - { "ibm,get-config-addr-info2", -1, -1, -1, -1, -1 }, - { "ibm,get-dynamic-sensor-state", -1, 1, -1, -1, -1 }, - { "ibm,get-indices", -1, 2, 3, -1, -1 }, - { "get-power-level", -1, -1, -1, -1, -1 }, - { "get-sensor-state", -1, -1, -1, -1, -1 }, - { "ibm,get-system-parameter", -1, 1, 2, -1, -1 }, - { "get-time-of-day", -1, -1, -1, -1, -1 }, - { "ibm,get-vpd", -1, 0, -1, 1, 2 }, - { "ibm,lpar-perftools", -1, 2, 3, -1, -1 }, - { "ibm,platform-dump", -1, 4, 5, -1, -1 }, /* Special cased */ - { "ibm,read-slot-reset-state", -1, -1, -1, -1, -1 }, - { "ibm,scan-log-dump", -1, 0, 1, -1, -1 }, - { "ibm,set-dynamic-indicator", -1, 2, -1, -1, -1 }, - { "ibm,set-eeh-option", -1, -1, -1, -1, -1 }, - { "set-indicator", -1, -1, -1, -1, -1 }, - { "set-power-level", -1, -1, -1, -1, -1 }, - { "set-time-for-power-on", -1, -1, -1, -1, -1 }, - { "ibm,set-system-parameter", -1, 1, -1, -1, -1 }, - { "set-time-of-day", -1, -1, -1, -1, -1 }, -#ifdef CONFIG_CPU_BIG_ENDIAN - { "ibm,suspend-me", -1, -1, -1, -1, -1 }, - { "ibm,update-nodes", -1, 0, -1, -1, -1, 4096 }, - { "ibm,update-properties", -1, 0, -1, -1, -1, 4096 }, -#endif - { "ibm,physical-attestation", -1, 0, 1, -1, -1 }, -}; static bool in_rmo_buf(u32 base, u32 end) { @@ -1123,84 +1773,84 @@ static bool in_rmo_buf(u32 base, u32 end) end < (rtas_rmo_buf + RTAS_USER_REGION_SIZE); } -static bool block_rtas_call(int token, int nargs, +static bool block_rtas_call(const struct rtas_function *func, int nargs, struct rtas_args *args) { - int i; + const struct rtas_filter *f; + const bool is_platform_dump = + func == &rtas_function_table[RTAS_FNIDX__IBM_PLATFORM_DUMP]; + const bool is_config_conn = + func == &rtas_function_table[RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR]; + u32 base, size, end; - for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) { - struct rtas_filter *f = &rtas_filters[i]; - u32 base, size, end; - - if (token != f->token) - continue; - - if (f->buf_idx1 != -1) { - base = be32_to_cpu(args->args[f->buf_idx1]); - if (f->size_idx1 != -1) - size = be32_to_cpu(args->args[f->size_idx1]); - else if (f->fixed_size) - size = f->fixed_size; - else - size = 1; - - end = base + size - 1; + /* + * Only functions with filters attached are allowed. + */ + f = func->filter; + if (!f) + goto err; + /* + * And some functions aren't allowed on LE. + */ + if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) && func->banned_for_syscall_on_le) + goto err; + + if (f->buf_idx1 != -1) { + base = be32_to_cpu(args->args[f->buf_idx1]); + if (f->size_idx1 != -1) + size = be32_to_cpu(args->args[f->size_idx1]); + else if (f->fixed_size) + size = f->fixed_size; + else + size = 1; - /* - * Special case for ibm,platform-dump - NULL buffer - * address is used to indicate end of dump processing - */ - if (!strcmp(f->name, "ibm,platform-dump") && - base == 0) - return false; + end = base + size - 1; - if (!in_rmo_buf(base, end)) - goto err; - } + /* + * Special case for ibm,platform-dump - NULL buffer + * address is used to indicate end of dump processing + */ + if (is_platform_dump && base == 0) + return false; - if (f->buf_idx2 != -1) { - base = be32_to_cpu(args->args[f->buf_idx2]); - if (f->size_idx2 != -1) - size = be32_to_cpu(args->args[f->size_idx2]); - else if (f->fixed_size) - size = f->fixed_size; - else - size = 1; - end = base + size - 1; + if (!in_rmo_buf(base, end)) + goto err; + } - /* - * Special case for ibm,configure-connector where the - * address can be 0 - */ - if (!strcmp(f->name, "ibm,configure-connector") && - base == 0) - return false; + if (f->buf_idx2 != -1) { + base = be32_to_cpu(args->args[f->buf_idx2]); + if (f->size_idx2 != -1) + size = be32_to_cpu(args->args[f->size_idx2]); + else if (f->fixed_size) + size = f->fixed_size; + else + size = 1; + end = base + size - 1; - if (!in_rmo_buf(base, end)) - goto err; - } + /* + * Special case for ibm,configure-connector where the + * address can be 0 + */ + if (is_config_conn && base == 0) + return false; - return false; + if (!in_rmo_buf(base, end)) + goto err; } + return false; err: pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n"); - pr_err_ratelimited("sys_rtas: token=0x%x, nargs=%d (called by %s)\n", - token, nargs, current->comm); + pr_err_ratelimited("sys_rtas: %s nargs=%d (called by %s)\n", + func->name, nargs, current->comm); return true; } -static void __init rtas_syscall_filter_init(void) -{ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) - rtas_filters[i].token = rtas_token(rtas_filters[i].name); -} - /* We assume to be passed big endian arguments */ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) { + const struct rtas_function *func; + struct pin_cookie cookie; struct rtas_args args; unsigned long flags; char *buff_copy, *errbuf = NULL; @@ -1224,21 +1874,29 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) || nargs + nret > ARRAY_SIZE(args.args)) return -EINVAL; + nargs = array_index_nospec(nargs, ARRAY_SIZE(args.args)); + nret = array_index_nospec(nret, ARRAY_SIZE(args.args) - nargs); + /* Copy in args. */ if (copy_from_user(args.args, uargs->args, nargs * sizeof(rtas_arg_t)) != 0) return -EFAULT; - if (token == RTAS_UNKNOWN_SERVICE) + /* + * If this token doesn't correspond to a function the kernel + * understands, you're not allowed to call it. + */ + func = rtas_token_to_function_untrusted(token); + if (!func) return -EINVAL; args.rets = &args.args[nargs]; memset(args.rets, 0, nret * sizeof(rtas_arg_t)); - if (block_rtas_call(token, nargs, &args)) + if (block_rtas_call(func, nargs, &args)) return -EINVAL; - if (token == ibm_open_errinjct_token || token == ibm_errinjct_token) { + if (token_is_restricted_errinjct(token)) { int err; err = security_locked_down(LOCKDOWN_RTAS_ERROR_INJECTION); @@ -1247,7 +1905,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) } /* Need to handle ibm,suspend_me call specially */ - if (token == rtas_token("ibm,suspend-me")) { + if (token == rtas_function_token(RTAS_FN_IBM_SUSPEND_ME)) { /* * rtas_ibm_suspend_me assumes the streamid handle is in cpu @@ -1268,18 +1926,32 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) buff_copy = get_errorlog_buffer(); - flags = lock_rtas(); + /* + * If this function has a mutex assigned to it, we must + * acquire it to avoid interleaving with any kernel-based uses + * of the same function. Kernel-based sequences acquire the + * appropriate mutex explicitly. + */ + if (func->lock) + mutex_lock(func->lock); + + raw_spin_lock_irqsave(&rtas_lock, flags); + cookie = lockdep_pin_lock(&rtas_lock); - rtas.args = args; - do_enter_rtas(__pa(&rtas.args)); - args = rtas.args; + rtas_args = args; + do_enter_rtas(&rtas_args); + args = rtas_args; /* A -1 return code indicates that the last command couldn't be completed due to a hardware error. */ if (be32_to_cpu(args.rets[0]) == -1) errbuf = __fetch_rtas_last_error(buff_copy); - unlock_rtas(flags); + lockdep_unpin_lock(&rtas_lock, cookie); + raw_spin_unlock_irqrestore(&rtas_lock, flags); + + if (func->lock) + mutex_unlock(func->lock); if (buff_copy) { if (errbuf) @@ -1297,6 +1969,54 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) return 0; } +static void __init rtas_function_table_init(void) +{ + struct property *prop; + + for (size_t i = 0; i < ARRAY_SIZE(rtas_function_table); ++i) { + struct rtas_function *curr = &rtas_function_table[i]; + struct rtas_function *prior; + int cmp; + + curr->token = RTAS_UNKNOWN_SERVICE; + + if (i == 0) + continue; + /* + * Ensure table is sorted correctly for binary search + * on function names. + */ + prior = &rtas_function_table[i - 1]; + + cmp = strcmp(prior->name, curr->name); + if (cmp < 0) + continue; + + if (cmp == 0) { + pr_err("'%s' has duplicate function table entries\n", + curr->name); + } else { + pr_err("function table unsorted: '%s' wrongly precedes '%s'\n", + prior->name, curr->name); + } + } + + for_each_property_of_node(rtas.dev, prop) { + struct rtas_function *func; + + if (prop->length != sizeof(u32)) + continue; + + func = __rtas_name_to_function(prop->name); + if (!func) + continue; + + func->token = be32_to_cpup((__be32 *)prop->value); + + pr_debug("function %s has token %u\n", func->name, func->token); + } +} + /* * Call early during boot, before mem init, to retrieve the RTAS * information from the device-tree and allocate the RMO buffer for userland @@ -1330,12 +2050,14 @@ void __init rtas_initialize(void) init_error_log_max(); + /* Must be called before any function token lookups */ + rtas_function_table_init(); + /* - * Discover these now to avoid device tree lookups in the + * Discover this now to avoid a device tree lookup in the * panic path. */ - if (of_property_read_bool(rtas.dev, "ibm,extended-os-term")) - ibm_os_term_token = rtas_token("ibm,os-term"); + ibm_extended_os_term = of_property_read_bool(rtas.dev, "ibm,extended-os-term"); /* If RTAS was found, allocate the RMO buffer for it and look for * the stop-self token if any @@ -1350,12 +2072,7 @@ void __init rtas_initialize(void) panic("ERROR: RTAS: Failed to allocate %lx bytes below %pa\n", PAGE_SIZE, &rtas_region); -#ifdef CONFIG_RTAS_ERROR_LOGGING - rtas_last_error_token = rtas_token("rtas-last-error"); -#endif - ibm_open_errinjct_token = rtas_token("ibm,open-errinjct"); - ibm_errinjct_token = rtas_token("ibm,errinjct"); - rtas_syscall_filter_init(); + rtas_work_area_reserve_arena(rtas_region); } int __init early_init_dt_scan_rtas(unsigned long node, @@ -1382,42 +2099,26 @@ int __init early_init_dt_scan_rtas(unsigned long node, rtas.size = *sizep; } -#ifdef CONFIG_UDBG_RTAS_CONSOLE - basep = of_get_flat_dt_prop(node, "put-term-char", NULL); - if (basep) - rtas_putchar_token = *basep; - - basep = of_get_flat_dt_prop(node, "get-term-char", NULL); - if (basep) - rtas_getchar_token = *basep; - - if (rtas_putchar_token != RTAS_UNKNOWN_SERVICE && - rtas_getchar_token != RTAS_UNKNOWN_SERVICE) - udbg_init_rtas_console(); - -#endif - /* break now */ return 1; } -static arch_spinlock_t timebase_lock; +static DEFINE_RAW_SPINLOCK(timebase_lock); static u64 timebase = 0; void rtas_give_timebase(void) { unsigned long flags; - local_irq_save(flags); + raw_spin_lock_irqsave(&timebase_lock, flags); hard_irq_disable(); - arch_spin_lock(&timebase_lock); - rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL); + rtas_call(rtas_function_token(RTAS_FN_FREEZE_TIME_BASE), 0, 1, NULL); timebase = get_tb(); - arch_spin_unlock(&timebase_lock); + raw_spin_unlock(&timebase_lock); while (timebase) barrier(); - rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL); + rtas_call(rtas_function_token(RTAS_FN_THAW_TIME_BASE), 0, 1, NULL); local_irq_restore(flags); } @@ -1425,8 +2126,8 @@ void rtas_take_timebase(void) { while (!timebase) barrier(); - arch_spin_lock(&timebase_lock); + raw_spin_lock(&timebase_lock); set_tb(timebase >> 32, timebase & 0xffffffff); timebase = 0; - arch_spin_unlock(&timebase_lock); + raw_spin_unlock(&timebase_lock); } diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index bc817a5619d6..583dc16e9d3c 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -312,13 +312,13 @@ static ssize_t rtas_flash_write(struct file *file, const char __user *buffer, { struct rtas_update_flash_t *const uf = &rtas_update_flash_data; char *p; - int next_free, rc; + int next_free; struct flash_block_list *fl; - mutex_lock(&rtas_update_flash_mutex); + guard(mutex)(&rtas_update_flash_mutex); if (uf->status == FLASH_AUTH || count == 0) - goto out; /* discard data */ + return count; /* discard data */ /* In the case that the image is not ready for flashing, the memory * allocated for the block list will be freed upon the release of the @@ -327,7 +327,7 @@ static ssize_t rtas_flash_write(struct file *file, const char __user *buffer, if (uf->flist == NULL) { uf->flist = kmem_cache_zalloc(flash_block_cache, GFP_KERNEL); if (!uf->flist) - goto nomem; + return -ENOMEM; } fl = uf->flist; @@ -338,7 +338,7 @@ static ssize_t rtas_flash_write(struct file *file, const char __user *buffer, /* Need to allocate another block_list */ fl->next = kmem_cache_zalloc(flash_block_cache, GFP_KERNEL); if (!fl->next) - goto nomem; + return -ENOMEM; fl = fl->next; next_free = 0; } @@ -347,25 +347,17 @@ static ssize_t rtas_flash_write(struct file *file, const char __user *buffer, count = RTAS_BLK_SIZE; p = kmem_cache_zalloc(flash_block_cache, GFP_KERNEL); if (!p) - goto nomem; + return -ENOMEM; if(copy_from_user(p, buffer, count)) { kmem_cache_free(flash_block_cache, p); - rc = -EFAULT; - goto error; + return -EFAULT; } fl->blocks[next_free].data = p; fl->blocks[next_free].length = count; fl->num_blocks++; -out: - mutex_unlock(&rtas_update_flash_mutex); - return count; -nomem: - rc = -ENOMEM; -error: - mutex_unlock(&rtas_update_flash_mutex); - return rc; + return count; } /* @@ -376,7 +368,7 @@ static void manage_flash(struct rtas_manage_flash_t *args_buf, unsigned int op) s32 rc; do { - rc = rtas_call(rtas_token("ibm,manage-flash-image"), 1, 1, + rc = rtas_call(rtas_function_token(RTAS_FN_IBM_MANAGE_FLASH_IMAGE), 1, 1, NULL, op); } while (rtas_busy_delay(rc)); @@ -405,19 +397,18 @@ static ssize_t manage_flash_write(struct file *file, const char __user *buf, static const char reject_str[] = "0"; static const char commit_str[] = "1"; char stkbuf[10]; - int op, rc; + int op; - mutex_lock(&rtas_manage_flash_mutex); + guard(mutex)(&rtas_manage_flash_mutex); if ((args_buf->status == MANAGE_AUTH) || (count == 0)) - goto out; + return count; op = -1; if (buf) { if (count > 9) count = 9; - rc = -EFAULT; if (copy_from_user (stkbuf, buf, count)) - goto error; + return -EFAULT; if (strncmp(stkbuf, reject_str, strlen(reject_str)) == 0) op = RTAS_REJECT_TMP_IMG; else if (strncmp(stkbuf, commit_str, strlen(commit_str)) == 0) @@ -425,18 +416,11 @@ static ssize_t manage_flash_write(struct file *file, const char __user *buf, } if (op == -1) { /* buf is empty, or contains invalid string */ - rc = -EINVAL; - goto error; + return -EINVAL; } manage_flash(args_buf, op); -out: - mutex_unlock(&rtas_manage_flash_mutex); return count; - -error: - mutex_unlock(&rtas_manage_flash_mutex); - return rc; } /* @@ -444,7 +428,7 @@ error: */ static void validate_flash(struct rtas_validate_flash_t *args_buf) { - int token = rtas_token("ibm,validate-flash-image"); + int token = rtas_function_token(RTAS_FN_IBM_VALIDATE_FLASH_IMAGE); int update_results; s32 rc; @@ -499,16 +483,14 @@ static ssize_t validate_flash_write(struct file *file, const char __user *buf, { struct rtas_validate_flash_t *const args_buf = &rtas_validate_flash_data; - int rc; - mutex_lock(&rtas_validate_flash_mutex); + guard(mutex)(&rtas_validate_flash_mutex); /* We are only interested in the first 4K of the * candidate image */ if ((*off >= VALIDATE_BUF_SIZE) || (args_buf->status == VALIDATE_AUTH)) { *off += count; - mutex_unlock(&rtas_validate_flash_mutex); return count; } @@ -519,20 +501,14 @@ static ssize_t validate_flash_write(struct file *file, const char __user *buf, args_buf->status = VALIDATE_INCOMPLETE; } - if (!access_ok(buf, count)) { - rc = -EFAULT; - goto done; - } - if (copy_from_user(args_buf->buf + *off, buf, count)) { - rc = -EFAULT; - goto done; - } + if (!access_ok(buf, count)) + return -EFAULT; + + if (copy_from_user(args_buf->buf + *off, buf, count)) + return -EFAULT; *off += count; - rc = count; -done: - mutex_unlock(&rtas_validate_flash_mutex); - return rc; + return count; } static int validate_flash_release(struct inode *inode, struct file *file) @@ -570,7 +546,7 @@ static void rtas_flash_firmware(int reboot_type) return; } - update_token = rtas_token("ibm,update-flash-64-and-reboot"); + update_token = rtas_function_token(RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT); if (update_token == RTAS_UNKNOWN_SERVICE) { printk(KERN_ALERT "FLASH: ibm,update-flash-64-and-reboot " "is not available -- not a service partition?\n"); @@ -653,7 +629,7 @@ static void rtas_flash_firmware(int reboot_type) */ struct rtas_flash_file { const char *filename; - const char *rtas_call_name; + const rtas_fn_handle_t handle; int *status; const struct proc_ops ops; }; @@ -661,7 +637,7 @@ struct rtas_flash_file { static const struct rtas_flash_file rtas_flash_files[] = { { .filename = "powerpc/rtas/" FIRMWARE_FLASH_NAME, - .rtas_call_name = "ibm,update-flash-64-and-reboot", + .handle = RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT, .status = &rtas_update_flash_data.status, .ops.proc_read = rtas_flash_read_msg, .ops.proc_write = rtas_flash_write, @@ -670,7 +646,7 @@ static const struct rtas_flash_file rtas_flash_files[] = { }, { .filename = "powerpc/rtas/" FIRMWARE_UPDATE_NAME, - .rtas_call_name = "ibm,update-flash-64-and-reboot", + .handle = RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT, .status = &rtas_update_flash_data.status, .ops.proc_read = rtas_flash_read_num, .ops.proc_write = rtas_flash_write, @@ -679,7 +655,7 @@ static const struct rtas_flash_file rtas_flash_files[] = { }, { .filename = "powerpc/rtas/" VALIDATE_FLASH_NAME, - .rtas_call_name = "ibm,validate-flash-image", + .handle = RTAS_FN_IBM_VALIDATE_FLASH_IMAGE, .status = &rtas_validate_flash_data.status, .ops.proc_read = validate_flash_read, .ops.proc_write = validate_flash_write, @@ -688,7 +664,7 @@ static const struct rtas_flash_file rtas_flash_files[] = { }, { .filename = "powerpc/rtas/" MANAGE_FLASH_NAME, - .rtas_call_name = "ibm,manage-flash-image", + .handle = RTAS_FN_IBM_MANAGE_FLASH_IMAGE, .status = &rtas_manage_flash_data.status, .ops.proc_read = manage_flash_read, .ops.proc_write = manage_flash_write, @@ -700,8 +676,7 @@ static int __init rtas_flash_init(void) { int i; - if (rtas_token("ibm,update-flash-64-and-reboot") == - RTAS_UNKNOWN_SERVICE) { + if (rtas_function_token(RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT) == RTAS_UNKNOWN_SERVICE) { pr_info("rtas_flash: no firmware flash support\n"); return -EINVAL; } @@ -710,9 +685,9 @@ static int __init rtas_flash_init(void) if (!rtas_validate_flash_data.buf) return -ENOMEM; - flash_block_cache = kmem_cache_create("rtas_flash_cache", - RTAS_BLK_SIZE, RTAS_BLK_SIZE, 0, - NULL); + flash_block_cache = kmem_cache_create_usercopy("rtas_flash_cache", + RTAS_BLK_SIZE, RTAS_BLK_SIZE, + 0, 0, RTAS_BLK_SIZE, NULL); if (!flash_block_cache) { printk(KERN_ERR "%s: failed to create block cache\n", __func__); @@ -730,7 +705,7 @@ static int __init rtas_flash_init(void) * This code assumes that the status int is the first member of the * struct */ - token = rtas_token(f->rtas_call_name); + token = rtas_function_token(f->handle); if (token == RTAS_UNKNOWN_SERVICE) *f->status = FLASH_AUTH; else @@ -774,4 +749,5 @@ static void __exit rtas_flash_cleanup(void) module_init(rtas_flash_init); module_exit(rtas_flash_cleanup); +MODULE_DESCRIPTION("PPC procfs firmware flash interface"); MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 5a2f5ea3b054..fccf96e897f6 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -43,7 +43,7 @@ static inline int config_access_valid(struct pci_dn *dn, int where) return 0; } -int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val) +int rtas_pci_dn_read_config(struct pci_dn *pdn, int where, int size, u32 *val) { int returnval = -1; unsigned long buid, addr; @@ -87,7 +87,7 @@ static int rtas_pci_read_config(struct pci_bus *bus, pdn = pci_get_pdn_by_devfn(bus, devfn); /* Validity of pdn is checked in here */ - ret = rtas_read_config(pdn, where, size, val); + ret = rtas_pci_dn_read_config(pdn, where, size, val); if (*val == EEH_IO_ERROR_VALUE(size) && eeh_dev_check_failure(pdn_to_eeh_dev(pdn))) return PCIBIOS_DEVICE_NOT_FOUND; @@ -95,7 +95,7 @@ static int rtas_pci_read_config(struct pci_bus *bus, return ret; } -int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val) +int rtas_pci_dn_write_config(struct pci_dn *pdn, int where, int size, u32 val) { unsigned long buid, addr; int ret; @@ -134,7 +134,7 @@ static int rtas_pci_write_config(struct pci_bus *bus, pdn = pci_get_pdn_by_devfn(bus, devfn); /* Validity of pdn is checked in here. */ - return rtas_write_config(pdn, where, size, val); + return rtas_pci_dn_write_config(pdn, where, size, val); } static struct pci_ops rtas_pci_ops = { @@ -191,10 +191,10 @@ static void python_countermeasures(struct device_node *dev) void __init init_pci_config_tokens(void) { - read_pci_config = rtas_token("read-pci-config"); - write_pci_config = rtas_token("write-pci-config"); - ibm_read_pci_config = rtas_token("ibm,read-pci-config"); - ibm_write_pci_config = rtas_token("ibm,write-pci-config"); + read_pci_config = rtas_function_token(RTAS_FN_READ_PCI_CONFIG); + write_pci_config = rtas_function_token(RTAS_FN_WRITE_PCI_CONFIG); + ibm_read_pci_config = rtas_function_token(RTAS_FN_IBM_READ_PCI_CONFIG); + ibm_write_pci_config = rtas_function_token(RTAS_FN_IBM_WRITE_PCI_CONFIG); } unsigned long get_phb_buid(struct device_node *phb) diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index cc56ac6ba4b0..6336ec9aedd0 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -89,6 +89,8 @@ static char *rtas_event_type(int type) return "Platform Resource Reassignment Event"; case RTAS_TYPE_HOTPLUG: return "Hotplug Event"; + case RTAS_TYPE_HVPIPE: + return "Hypervisor Pipe Notification event"; } return rtas_type[0]; @@ -506,7 +508,7 @@ static int __init rtas_event_scan_init(void) return 0; /* No RTAS */ - event_scan = rtas_token("event-scan"); + event_scan = rtas_function_token(RTAS_FN_EVENT_SCAN); if (event_scan == RTAS_UNKNOWN_SERVICE) { printk(KERN_INFO "rtasd: No event-scan on system\n"); return -ENODEV; diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c index f9af305d9579..3a28795b4ed8 100644 --- a/arch/powerpc/kernel/secure_boot.c +++ b/arch/powerpc/kernel/secure_boot.c @@ -5,6 +5,7 @@ */ #include <linux/types.h> #include <linux/of.h> +#include <linux/string_choices.h> #include <asm/secure_boot.h> static struct device_node *get_ppc_fw_sb_node(void) @@ -32,11 +33,13 @@ bool is_ppc_secureboot_enabled(void) if (enabled) goto out; - if (!of_property_read_u32(of_root, "ibm,secure-boot", &secureboot)) + node = of_find_node_by_path("/"); + if (!of_property_read_u32(node, "ibm,secure-boot", &secureboot)) enabled = (secureboot > 1); + of_node_put(node); out: - pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled"); + pr_info("Secure boot mode %s\n", str_enabled_disabled(enabled)); return enabled; } @@ -54,11 +57,13 @@ bool is_ppc_trustedboot_enabled(void) if (enabled) goto out; - if (!of_property_read_u32(of_root, "ibm,trusted-boot", &trustedboot)) + node = of_find_node_by_path("/"); + if (!of_property_read_u32(node, "ibm,trusted-boot", &trustedboot)) enabled = (trustedboot > 0); + of_node_put(node); out: - pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled"); + pr_info("Trusted boot mode %s\n", str_enabled_disabled(enabled)); return enabled; } diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 206475e3e0b4..fbb7ebd8aa08 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -14,7 +14,7 @@ #include <linux/debugfs.h> #include <asm/asm-prototypes.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/security_features.h> #include <asm/sections.h> #include <asm/setup.h> @@ -364,26 +364,27 @@ ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute * static int ssb_prctl_get(struct task_struct *task) { + /* + * The STF_BARRIER feature is on by default, so if it's off that means + * firmware has explicitly said the CPU is not vulnerable via either + * the hypercall or device tree. + */ + if (!security_ftr_enabled(SEC_FTR_STF_BARRIER)) + return PR_SPEC_NOT_AFFECTED; + + /* + * If the system's CPU has no known barrier (see setup_stf_barrier()) + * then assume that the CPU is not vulnerable. + */ if (stf_enabled_flush_types == STF_BARRIER_NONE) - /* - * We don't have an explicit signal from firmware that we're - * vulnerable or not, we only have certain CPU revisions that - * are known to be vulnerable. - * - * We assume that if we're on another CPU, where the barrier is - * NONE, then we are not vulnerable. - */ return PR_SPEC_NOT_AFFECTED; - else - /* - * If we do have a barrier type then we are vulnerable. The - * barrier is not a global or per-process mitigation, so the - * only value we can report here is PR_SPEC_ENABLE, which - * appears as "vulnerable" in /proc. - */ - return PR_SPEC_ENABLE; - - return -EINVAL; + + /* + * Otherwise the CPU is vulnerable. The barrier is not a global or + * per-process mitigation, so the only value that can be reported here + * is PR_SPEC_ENABLE, which appears as "vulnerable" in /proc. + */ + return PR_SPEC_ENABLE; } int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) diff --git a/arch/powerpc/kernel/secvar-ops.c b/arch/powerpc/kernel/secvar-ops.c index 6a29777d6a2d..19172a2804f0 100644 --- a/arch/powerpc/kernel/secvar-ops.c +++ b/arch/powerpc/kernel/secvar-ops.c @@ -8,10 +8,16 @@ #include <linux/cache.h> #include <asm/secvar.h> +#include <asm/bug.h> -const struct secvar_operations *secvar_ops __ro_after_init; +const struct secvar_operations *secvar_ops __ro_after_init = NULL; -void set_secvar_ops(const struct secvar_operations *ops) +int set_secvar_ops(const struct secvar_operations *ops) { + if (WARN_ON_ONCE(secvar_ops)) + return -EBUSY; + secvar_ops = ops; + + return 0; } diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index 1ee4640a2641..ec900bce0257 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -21,56 +21,48 @@ static struct kset *secvar_kset; static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - ssize_t rc = 0; - struct device_node *node; - const char *format; - - node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); - if (!of_device_is_available(node)) { - rc = -ENODEV; - goto out; - } - - rc = of_property_read_string(node, "format", &format); - if (rc) - goto out; - - rc = sprintf(buf, "%s\n", format); + char tmp[32]; + ssize_t len = secvar_ops->format(tmp, sizeof(tmp)); -out: - of_node_put(node); + if (len > 0) + return sysfs_emit(buf, "%s\n", tmp); + else if (len < 0) + pr_err("Error %zd reading format string\n", len); + else + pr_err("Got empty format string from backend\n"); - return rc; + return -EIO; } static ssize_t size_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - uint64_t dsize; + u64 dsize; int rc; rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize); if (rc) { - pr_err("Error retrieving %s variable size %d\n", kobj->name, - rc); + if (rc != -ENOENT) + pr_err("Error retrieving %s variable size %d\n", kobj->name, rc); return rc; } - return sprintf(buf, "%llu\n", dsize); + return sysfs_emit(buf, "%llu\n", dsize); } static ssize_t data_read(struct file *filep, struct kobject *kobj, - struct bin_attribute *attr, char *buf, loff_t off, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { - uint64_t dsize; char *data; + u64 dsize; int rc; rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize); if (rc) { - pr_err("Error getting %s variable size %d\n", kobj->name, rc); + if (rc != -ENOENT) + pr_err("Error getting %s variable size %d\n", kobj->name, rc); return rc; } pr_debug("dsize is %llu\n", dsize); @@ -93,7 +85,7 @@ data_fail: } static ssize_t update_write(struct file *filep, struct kobject *kobj, - struct bin_attribute *attr, char *buf, loff_t off, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { int rc; @@ -112,11 +104,11 @@ static struct kobj_attribute format_attr = __ATTR_RO(format); static struct kobj_attribute size_attr = __ATTR_RO(size); -static struct bin_attribute data_attr = __BIN_ATTR_RO(data, 0); +static struct bin_attribute data_attr __ro_after_init = __BIN_ATTR_RO(data, 0); -static struct bin_attribute update_attr = __BIN_ATTR_WO(update, 0); +static struct bin_attribute update_attr __ro_after_init = __BIN_ATTR_WO(update, 0); -static struct bin_attribute *secvar_bin_attrs[] = { +static const struct bin_attribute *const secvar_bin_attrs[] = { &data_attr, &update_attr, NULL, @@ -133,44 +125,68 @@ static const struct attribute_group secvar_attr_group = { }; __ATTRIBUTE_GROUPS(secvar_attr); -static struct kobj_type secvar_ktype = { +static const struct kobj_type secvar_ktype = { .sysfs_ops = &kobj_sysfs_ops, .default_groups = secvar_attr_groups, }; -static int update_kobj_size(void) +static __init int update_kobj_size(void) { - struct device_node *node; u64 varsize; - int rc = 0; - - node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); - if (!of_device_is_available(node)) { - rc = -ENODEV; - goto out; - } + int rc = secvar_ops->max_size(&varsize); - rc = of_property_read_u64(node, "max-var-size", &varsize); if (rc) - goto out; + return rc; data_attr.size = varsize; update_attr.size = varsize; -out: - of_node_put(node); + return 0; +} - return rc; +static __init int secvar_sysfs_config(struct kobject *kobj) +{ + struct attribute_group config_group = { + .name = "config", + .attrs = (struct attribute **)secvar_ops->config_attrs, + }; + + if (secvar_ops->config_attrs) + return sysfs_create_group(kobj, &config_group); + + return 0; } -static int secvar_sysfs_load(void) +static __init int add_var(const char *name) { - char *name; - uint64_t namesize = 0; struct kobject *kobj; int rc; + kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); + if (!kobj) + return -ENOMEM; + + kobject_init(kobj, &secvar_ktype); + + rc = kobject_add(kobj, &secvar_kset->kobj, "%s", name); + if (rc) { + pr_warn("kobject_add error %d for attribute: %s\n", rc, + name); + kobject_put(kobj); + return rc; + } + + kobject_uevent(kobj, KOBJ_ADD); + return 0; +} + +static __init int secvar_sysfs_load(void) +{ + u64 namesize = 0; + char *name; + int rc; + name = kzalloc(NAME_MAX_SIZE, GFP_KERNEL); if (!name) return -ENOMEM; @@ -179,73 +195,99 @@ static int secvar_sysfs_load(void) rc = secvar_ops->get_next(name, &namesize, NAME_MAX_SIZE); if (rc) { if (rc != -ENOENT) - pr_err("error getting secvar from firmware %d\n", - rc); - break; - } + pr_err("error getting secvar from firmware %d\n", rc); + else + rc = 0; - kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); - if (!kobj) { - rc = -ENOMEM; break; } - kobject_init(kobj, &secvar_ktype); - - rc = kobject_add(kobj, &secvar_kset->kobj, "%s", name); - if (rc) { - pr_warn("kobject_add error %d for attribute: %s\n", rc, - name); - kobject_put(kobj); - kobj = NULL; - } - - if (kobj) - kobject_uevent(kobj, KOBJ_ADD); - + rc = add_var(name); } while (!rc); kfree(name); return rc; } -static int secvar_sysfs_init(void) +static __init int secvar_sysfs_load_static(void) +{ + const char * const *name_ptr = secvar_ops->var_names; + int rc; + + while (*name_ptr) { + rc = add_var(*name_ptr); + if (rc) + return rc; + name_ptr++; + } + + return 0; +} + +static __init int secvar_sysfs_init(void) { + u64 max_size; int rc; if (!secvar_ops) { - pr_warn("secvar: failed to retrieve secvar operations.\n"); + pr_warn("Failed to retrieve secvar operations\n"); return -ENODEV; } secvar_kobj = kobject_create_and_add("secvar", firmware_kobj); if (!secvar_kobj) { - pr_err("secvar: Failed to create firmware kobj\n"); + pr_err("Failed to create firmware kobj\n"); return -ENOMEM; } rc = sysfs_create_file(secvar_kobj, &format_attr.attr); if (rc) { - kobject_put(secvar_kobj); - return -ENOMEM; + pr_err("Failed to create format object\n"); + rc = -ENOMEM; + goto err; } secvar_kset = kset_create_and_add("vars", NULL, secvar_kobj); if (!secvar_kset) { - pr_err("secvar: sysfs kobject registration failed.\n"); - kobject_put(secvar_kobj); - return -ENOMEM; + pr_err("sysfs kobject registration failed\n"); + rc = -ENOMEM; + goto err; } rc = update_kobj_size(); if (rc) { pr_err("Cannot read the size of the attribute\n"); - return rc; + goto err; + } + + rc = secvar_sysfs_config(secvar_kobj); + if (rc) { + pr_err("Failed to create config directory\n"); + goto err; } - secvar_sysfs_load(); + if (secvar_ops->get_next) + rc = secvar_sysfs_load(); + else + rc = secvar_sysfs_load_static(); + + if (rc) { + pr_err("Failed to create variable attributes\n"); + goto err; + } + + // Due to sysfs limitations, we will only ever get a write buffer of + // up to 1 page in size. Print a warning if this is potentially going + // to cause problems, so that the user is aware. + secvar_ops->max_size(&max_size); + if (max_size > PAGE_SIZE) + pr_warn_ratelimited("PAGE_SIZE (%lu) is smaller than maximum object size (%llu), writes are limited to PAGE_SIZE\n", + PAGE_SIZE, max_size); return 0; +err: + kobject_put(secvar_kobj); + return rc; } late_initcall(secvar_sysfs_init); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 9b10e57040c6..c8c42b419742 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -22,7 +22,6 @@ #include <linux/seq_file.h> #include <linux/ioport.h> #include <linux/console.h> -#include <linux/screen_info.h> #include <linux/root_dev.h> #include <linux/cpu.h> #include <linux/unistd.h> @@ -31,11 +30,12 @@ #include <linux/serial_8250.h> #include <linux/percpu.h> #include <linux/memblock.h> -#include <linux/of_irq.h> +#include <linux/of.h> #include <linux/of_fdt.h> -#include <linux/of_platform.h> +#include <linux/of_irq.h> #include <linux/hugetlb.h> #include <linux/pgtable.h> +#include <asm/kexec.h> #include <asm/io.h> #include <asm/paca.h> #include <asm/processor.h> @@ -68,6 +68,7 @@ #include <asm/cpu_has_feature.h> #include <asm/kasan.h> #include <asm/mce.h> +#include <asm/systemcfg.h> #include "setup.h" @@ -86,6 +87,11 @@ EXPORT_SYMBOL(machine_id); int boot_cpuid = -1; EXPORT_SYMBOL_GPL(boot_cpuid); +int __initdata boot_core_hwid = -1; + +#ifdef CONFIG_PPC64 +int boot_cpu_hwid = -1; +#endif /* * These are used in binfmt_elf.c to put aux entries on the stack @@ -94,21 +100,6 @@ EXPORT_SYMBOL_GPL(boot_cpuid); int dcache_bsize; int icache_bsize; -/* - * This still seems to be needed... -- paulus - */ -struct screen_info screen_info = { - .orig_x = 0, - .orig_y = 25, - .orig_video_cols = 80, - .orig_video_lines = 25, - .orig_video_isVGA = 1, - .orig_video_points = 16 -}; -#if defined(CONFIG_FB_VGA16_MODULE) -EXPORT_SYMBOL(screen_info); -#endif - /* Variables required to store legacy IO irq routing */ int of_i8042_kbd_irq; EXPORT_SYMBOL_GPL(of_i8042_kbd_irq); @@ -121,7 +112,7 @@ int ppc_do_canonicalize_irqs; EXPORT_SYMBOL(ppc_do_canonicalize_irqs); #endif -#ifdef CONFIG_CRASH_CORE +#ifdef CONFIG_CRASH_DUMP /* This keeps a track of which one is the crashing cpu. */ int crashing_cpu = -1; #endif @@ -416,13 +407,32 @@ static void __init cpu_init_thread_core_maps(int tpc) cpumask_set_cpu(i, &threads_core_mask); printk(KERN_INFO "CPU maps initialized for %d thread%s per core\n", - tpc, tpc > 1 ? "s" : ""); + tpc, str_plural(tpc)); printk(KERN_DEBUG " (thread shift is %d)\n", threads_shift); } u32 *cpu_to_phys_id = NULL; +static int assign_threads(unsigned int cpu, unsigned int nthreads, bool present, + const __be32 *hw_ids) +{ + for (int i = 0; i < nthreads && cpu < nr_cpu_ids; i++) { + __be32 hwid; + + hwid = be32_to_cpu(hw_ids[i]); + + DBG(" thread %d -> cpu %d (hard id %d)\n", i, cpu, hwid); + + set_cpu_present(cpu, present); + set_cpu_possible(cpu, true); + cpu_to_phys_id[cpu] = hwid; + cpu++; + } + + return cpu; +} + /** * setup_cpu_maps - initialize the following cpu maps: * cpu_possible_mask @@ -449,16 +459,13 @@ void __init smp_setup_cpu_maps(void) DBG("smp_setup_cpu_maps()\n"); - cpu_to_phys_id = memblock_alloc(nr_cpu_ids * sizeof(u32), + cpu_to_phys_id = memblock_alloc_or_panic(nr_cpu_ids * sizeof(u32), __alignof__(u32)); - if (!cpu_to_phys_id) - panic("%s: Failed to allocate %zu bytes align=0x%zx\n", - __func__, nr_cpu_ids * sizeof(u32), __alignof__(u32)); for_each_node_by_type(dn, "cpu") { const __be32 *intserv; __be32 cpu_be; - int j, len; + int len; DBG(" * %pOF...\n", dn); @@ -480,27 +487,31 @@ void __init smp_setup_cpu_maps(void) nthreads = len / sizeof(int); - for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) { - bool avail; + bool avail = of_device_is_available(dn); + if (!avail) + avail = !of_property_match_string(dn, + "enable-method", "spin-table"); - DBG(" thread %d -> cpu %d (hard id %d)\n", - j, cpu, be32_to_cpu(intserv[j])); - - avail = of_device_is_available(dn); - if (!avail) - avail = !of_property_match_string(dn, - "enable-method", "spin-table"); - - set_cpu_present(cpu, avail); - set_cpu_possible(cpu, true); - cpu_to_phys_id[cpu] = be32_to_cpu(intserv[j]); - cpu++; - } + if (boot_core_hwid >= 0) { + if (cpu == 0) { + pr_info("Skipping CPU node %pOF to allow for boot core.\n", dn); + cpu = nthreads; + continue; + } - if (cpu >= nr_cpu_ids) { + if (be32_to_cpu(intserv[0]) == boot_core_hwid) { + pr_info("Renumbered boot core %pOF to logical 0\n", dn); + assign_threads(0, nthreads, avail, intserv); + of_node_put(dn); + break; + } + } else if (cpu >= nr_cpu_ids) { of_node_put(dn); break; } + + if (cpu < nr_cpu_ids) + cpu = assign_threads(cpu, nthreads, avail, intserv); } /* If no SMT supported, nthreads is forced to 1 */ @@ -548,7 +559,9 @@ void __init smp_setup_cpu_maps(void) out: of_node_put(dn); } - vdso_data->processorCount = num_present_cpus(); +#endif +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG + systemcfg->processorCount = num_present_cpus(); #endif /* CONFIG_PPC64 */ /* Initialize CPU <=> thread mapping/ @@ -597,7 +610,6 @@ struct seq_buf ppc_hw_desc __initdata = { .buffer = ppc_hw_desc_buf, .size = sizeof(ppc_hw_desc_buf), .len = 0, - .readpos = 0, }; static __init void probe_machine(void) @@ -626,13 +638,16 @@ static __init void probe_machine(void) for (machine_id = &__machine_desc_start; machine_id < &__machine_desc_end; machine_id++) { - DBG(" %s ...", machine_id->name); + DBG(" %s ...\n", machine_id->name); + if (machine_id->compatible && !of_machine_is_compatible(machine_id->compatible)) + continue; + if (machine_id->compatibles && !of_machine_compatible_match(machine_id->compatibles)) + continue; memcpy(&ppc_md, machine_id, sizeof(struct machdep_calls)); - if (ppc_md.probe()) { - DBG(" match !\n"); - break; - } - DBG("\n"); + if (ppc_md.probe && !ppc_md.probe()) + continue; + DBG(" %s match !\n", machine_id->name); + break; } /* What can we do if we didn't find ? */ if (machine_id >= &__machine_desc_end) { @@ -817,8 +832,8 @@ static int __init check_cache_coherency(void) if (devtree_coherency != KERNEL_COHERENCY) { printk(KERN_ERR "kernel coherency:%s != device tree_coherency:%s\n", - KERNEL_COHERENCY ? "on" : "off", - devtree_coherency ? "on" : "off"); + str_on_off(KERNEL_COHERENCY), + str_on_off(devtree_coherency)); BUG(); } @@ -943,6 +958,7 @@ void __init setup_arch(char **cmdline_p) /* Parse memory topology */ mem_topology_setup(); + high_memory = (void *)__va(max_low_pfn * PAGE_SIZE); /* * Release secondary cpus out of their spinloops at 0x60 now that @@ -964,8 +980,12 @@ void __init setup_arch(char **cmdline_p) klp_init_thread_info(&init_task); setup_initial_init_mm(_stext, _etext, _edata, _end); - + /* sched_init() does the mmgrab(&init_mm) for the primary CPU */ + VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm))); + cpumask_set_cpu(smp_processor_id(), mm_cpumask(&init_mm)); + inc_mm_active_cpus(&init_mm); mm_iommu_init(&init_mm); + irqstack_early_init(); exc_lvl_early_init(); emergency_stack_init(); @@ -976,9 +996,12 @@ void __init setup_arch(char **cmdline_p) initmem_init(); /* - * Reserve large chunks of memory for use by CMA for KVM and hugetlb. These must - * be called after initmem_init(), so that pageblock_order is initialised. + * Reserve large chunks of memory for use by CMA for kdump, fadump, KVM and + * hugetlb. These must be called after initmem_init(), so that + * pageblock_order is initialised. */ + fadump_cma_init(); + kdump_cma_reserve(); kvm_cma_reserve(); gigantic_hugetlb_cma_reserve(); diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 7912bb50a7cb..385a00a2e2ca 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -29,7 +29,7 @@ void setup_tlb_core_data(void); static inline void setup_tlb_core_data(void) { } #endif -#ifdef CONFIG_BOOKE_OR_40x +#ifdef CONFIG_BOOKE void exc_lvl_early_init(void); #else static inline void exc_lvl_early_init(void) { } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index b761cc1a403c..5a1bf501fbe1 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -40,7 +40,7 @@ #include <asm/time.h> #include <asm/serial.h> #include <asm/udbg.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/cpu_has_feature.h> #include <asm/asm-prototypes.h> #include <asm/kdump.h> @@ -140,13 +140,7 @@ arch_initcall(ppc_init); static void *__init alloc_stack(void) { - void *ptr = memblock_alloc(THREAD_SIZE, THREAD_ALIGN); - - if (!ptr) - panic("cannot allocate %d bytes for stack at %pS\n", - THREAD_SIZE, (void *)_RET_IP_); - - return ptr; + return memblock_alloc_or_panic(THREAD_SIZE, THREAD_ALIGN); } void __init irqstack_early_init(void) @@ -176,7 +170,7 @@ void __init emergency_stack_init(void) } #endif -#ifdef CONFIG_BOOKE_OR_40x +#ifdef CONFIG_BOOKE void __init exc_lvl_early_init(void) { unsigned int i, hw_cpu; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index a0dee7354fe6..8fd7cbf3bd04 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -60,7 +60,7 @@ #include <asm/xmon.h> #include <asm/udbg.h> #include <asm/kexec.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/ftrace.h> #include <asm/opal.h> #include <asm/cputhreads.h> @@ -141,10 +141,7 @@ void __init check_smt_enabled(void) smt_enabled_at_boot = 0; else { int smt; - int rc; - - rc = kstrtoint(smt_enabled_cmdline, 10, &smt); - if (!rc) + if (!kstrtoint(smt_enabled_cmdline, 10, &smt)) smt_enabled_at_boot = min(threads_per_core, smt); } @@ -364,7 +361,7 @@ void __init early_setup(unsigned long dt_ptr) */ initialise_paca(&boot_paca, 0); fixup_boot_paca(&boot_paca); - WARN_ON(local_paca != 0); + WARN_ON(local_paca); setup_paca(&boot_paca); /* install the paca into registers */ /* -------- printk is now safe to use ------- */ @@ -385,17 +382,21 @@ void __init early_setup(unsigned long dt_ptr) /* * Do early initialization using the flattened device * tree, such as retrieving the physical memory map or - * calculating/retrieving the hash table size. + * calculating/retrieving the hash table size, discover + * boot_cpuid and boot_cpu_hwid. */ early_init_devtree(__va(dt_ptr)); - /* Now we know the logical id of our boot cpu, setup the paca. */ - if (boot_cpuid != 0) { - /* Poison paca_ptrs[0] again if it's not the boot cpu */ - memset(&paca_ptrs[0], 0x88, sizeof(paca_ptrs[0])); - } + allocate_paca_ptrs(); + allocate_paca(boot_cpuid); + set_hard_smp_processor_id(boot_cpuid, boot_cpu_hwid); fixup_boot_paca(paca_ptrs[boot_cpuid]); setup_paca(paca_ptrs[boot_cpuid]); /* install the paca into registers */ + // smp_processor_id() now reports boot_cpuid + +#ifdef CONFIG_SMP + task_thread_info(current)->cpu = boot_cpuid; // fix task_cpu(current) +#endif /* * Configure exception handlers. This include setting up trampolines @@ -476,7 +477,7 @@ void early_setup_secondary(void) #endif /* CONFIG_SMP */ -void panic_smp_self_stop(void) +void __noreturn panic_smp_self_stop(void) { hard_irq_disable(); spin_begin(); @@ -692,11 +693,7 @@ __init u64 ppc64_bolted_size(void) { #ifdef CONFIG_PPC_BOOK3E_64 /* Freescale BookE bolts the entire linear mapping */ - /* XXX: BookE ppc64_rma_limit setup seems to disagree? */ - if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E)) - return linear_map_top; - /* Other BookE, we assume the first GB is bolted */ - return 1ul << 30; + return linear_map_top; #else /* BookS radix, does not take faults on linear mapping */ if (early_radix_enabled()) @@ -830,6 +827,7 @@ static __init int pcpu_cpu_to_node(int cpu) unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); +DEFINE_STATIC_KEY_FALSE(__percpu_first_chunk_is_paged); void __init setup_per_cpu_areas(void) { @@ -872,6 +870,7 @@ void __init setup_per_cpu_areas(void) if (rc < 0) panic("cannot initialize percpu area (err=%d)", rc); + static_key_enable(&__percpu_first_chunk_is_paged.key); delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; @@ -890,7 +889,7 @@ unsigned long memory_block_size_bytes(void) } #endif -#if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO) +#ifdef CONFIG_PPC_INDIRECT_PIO struct ppc_pci_io ppc_pci_io; EXPORT_SYMBOL(ppc_pci_io); #endif @@ -918,6 +917,7 @@ static int __init disable_hardlockup_detector(void) hardlockup_detector_disable(); #else if (firmware_has_feature(FW_FEATURE_LPAR)) { + check_kvm_guest(); if (is_kvm_guest()) hardlockup_detector_disable(); } diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 68a91e553e14..aa17e62f3754 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Common signal handling code for both 32 and 64 bits * * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation * Extracted from signal_32.c and signal_64.c - * - * This file is subject to the terms and conditions of the GNU General - * Public License. See the file README.legal in the main directory of - * this archive for more details. */ #include <linux/resume_user_mode.h> diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index a429c57ed433..58ecea1cdc27 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -1,10 +1,7 @@ -/* +/* SPDX-License-Identifier: GPL-2.0-or-later + * * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation * Extracted from signal_32.c and signal_64.c - * - * This file is subject to the terms and conditions of the GNU General - * Public License. See the file README.legal in the main directory of - * this archive for more details. */ #ifndef _POWERPC_ARCH_SIGNAL_H diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index c114c7f25645..7a718ed32b27 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -264,8 +264,9 @@ static void prepare_save_user_regs(int ctx_has_vsx_region) #endif } -static int __unsafe_save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, - struct mcontext __user *tm_frame, int ctx_has_vsx_region) +static __always_inline int +__unsafe_save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, int ctx_has_vsx_region) { unsigned long msr = regs->msr; @@ -364,8 +365,9 @@ static void prepare_save_tm_user_regs(void) current->thread.ckvrsave = mfspr(SPRN_VRSAVE); } -static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, - struct mcontext __user *tm_frame, unsigned long msr) +static __always_inline int +save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, unsigned long msr) { /* Save both sets of general registers */ unsafe_save_general_regs(¤t->thread.ckpt_regs, frame, failed); @@ -444,8 +446,9 @@ failed: #else static void prepare_save_tm_user_regs(void) { } -static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, - struct mcontext __user *tm_frame, unsigned long msr) +static __always_inline int +save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, unsigned long msr) { return 0; } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 6b90f10a6c81..292fee8809bc 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -47,6 +47,7 @@ #include <asm/smp.h> #include <asm/time.h> #include <asm/machdep.h> +#include <asm/mmu_context.h> #include <asm/cputhreads.h> #include <asm/cputable.h> #include <asm/mpic.h> @@ -60,6 +61,9 @@ #include <asm/ftrace.h> #include <asm/kup.h> #include <asm/fadump.h> +#include <asm/systemcfg.h> + +#include <trace/events/ipi.h> #ifdef DEBUG #include <asm/udbg.h> @@ -74,10 +78,10 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 }; #endif struct task_struct *secondary_current; -bool has_big_cores; -bool coregroup_enabled; -bool thread_group_shares_l2; -bool thread_group_shares_l3; +bool has_big_cores __ro_after_init; +bool coregroup_enabled __ro_after_init; +bool thread_group_shares_l2 __ro_after_init; +bool thread_group_shares_l3 __ro_after_init; DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); @@ -90,15 +94,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); EXPORT_SYMBOL_GPL(has_big_cores); -enum { -#ifdef CONFIG_SCHED_SMT - smt_idx, -#endif - cache_idx, - mc_idx, - die_idx, -}; - #define MAX_THREAD_LIST_SIZE 8 #define THREAD_GROUP_SHARE_L1 1 #define THREAD_GROUP_SHARE_L2_L3 2 @@ -289,7 +284,7 @@ void smp_muxed_ipi_set_message(int cpu, int msg) * Order previous accesses before accesses in the IPI handler. */ smp_mb(); - message[msg] = 1; + WRITE_ONCE(message[msg], 1); } void smp_muxed_ipi_message_pass(int cpu, int msg) @@ -348,7 +343,7 @@ irqreturn_t smp_ipi_demux_relaxed(void) if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI)) nmi_ipi_action(0, NULL); #endif - } while (info->messages); + } while (READ_ONCE(info->messages)); return IRQ_HANDLED; } @@ -364,12 +359,12 @@ static inline void do_message_pass(int cpu, int msg) #endif } -void smp_send_reschedule(int cpu) +void arch_smp_send_reschedule(int cpu) { if (likely(smp_ops)) do_message_pass(cpu, PPC_MSG_RESCHEDULE); } -EXPORT_SYMBOL_GPL(smp_send_reschedule); +EXPORT_SYMBOL_GPL(arch_smp_send_reschedule); void arch_send_call_function_single_ipi(int cpu) { @@ -415,9 +410,9 @@ noinstr static void nmi_ipi_lock_start(unsigned long *flags) { raw_local_irq_save(*flags); hard_irq_disable(); - while (arch_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) { + while (raw_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) { raw_local_irq_restore(*flags); - spin_until_cond(arch_atomic_read(&__nmi_ipi_lock) == 0); + spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == 0); raw_local_irq_save(*flags); hard_irq_disable(); } @@ -425,15 +420,15 @@ noinstr static void nmi_ipi_lock_start(unsigned long *flags) noinstr static void nmi_ipi_lock(void) { - while (arch_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) - spin_until_cond(arch_atomic_read(&__nmi_ipi_lock) == 0); + while (raw_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) + spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == 0); } noinstr static void nmi_ipi_unlock(void) { smp_mb(); - WARN_ON(arch_atomic_read(&__nmi_ipi_lock) != 1); - arch_atomic_set(&__nmi_ipi_lock, 0); + WARN_ON(raw_atomic_read(&__nmi_ipi_lock) != 1); + raw_atomic_set(&__nmi_ipi_lock, 0); } noinstr static void nmi_ipi_unlock_end(unsigned long *flags) @@ -594,7 +589,7 @@ void smp_send_debugger_break(void) } #endif -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_DUMP void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) { int cpu; @@ -637,7 +632,7 @@ void crash_smp_send_stop(void) stopped = true; -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_DUMP if (kexec_crash_image) { crash_kexec_prepare(); return; @@ -984,13 +979,13 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property) return 0; } -static bool shared_caches; +static bool shared_caches __ro_after_init; #ifdef CONFIG_SCHED_SMT /* cpumask of CPUs with asymmetric SMT dependency */ static int powerpc_smt_flags(void) { - int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; + int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_LLC; if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); @@ -1001,6 +996,13 @@ static int powerpc_smt_flags(void) #endif /* + * On shared processor LPARs scheduled on a big core (which has two or more + * independent thread groups per core), prefer lower numbered CPUs, so + * that workload consolidates to lesser number of cores. + */ +static __ro_after_init DEFINE_STATIC_KEY_FALSE(splpar_asym_pack); + +/* * P9 has a slightly odd architecture where pairs of cores share an L2 cache. * This topology makes it *much* cheaper to migrate tasks between adjacent cores * since the migrated task remains cache hot. We want to take advantage of this @@ -1008,50 +1010,50 @@ static int powerpc_smt_flags(void) */ static int powerpc_shared_cache_flags(void) { - return SD_SHARE_PKG_RESOURCES; + if (static_branch_unlikely(&splpar_asym_pack)) + return SD_SHARE_LLC | SD_ASYM_PACKING; + + return SD_SHARE_LLC; +} + +static int powerpc_shared_proc_flags(void) +{ + if (static_branch_unlikely(&splpar_asym_pack)) + return SD_ASYM_PACKING; + + return 0; } /* * We can't just pass cpu_l2_cache_mask() directly because * returns a non-const pointer and the compiler barfs on that. */ -static const struct cpumask *shared_cache_mask(int cpu) +static const struct cpumask *tl_cache_mask(struct sched_domain_topology_level *tl, int cpu) { return per_cpu(cpu_l2_cache_map, cpu); } #ifdef CONFIG_SCHED_SMT -static const struct cpumask *smallcore_smt_mask(int cpu) +static const struct cpumask *tl_smallcore_smt_mask(struct sched_domain_topology_level *tl, int cpu) { return cpu_smallcore_mask(cpu); } #endif -static struct cpumask *cpu_coregroup_mask(int cpu) +struct cpumask *cpu_coregroup_mask(int cpu) { return per_cpu(cpu_coregroup_map, cpu); } static bool has_coregroup_support(void) { - return coregroup_enabled; -} + /* Coregroup identification not available on shared systems */ + if (is_shared_processor()) + return 0; -static const struct cpumask *cpu_mc_mask(int cpu) -{ - return cpu_coregroup_mask(cpu); + return coregroup_enabled; } -static struct sched_domain_topology_level powerpc_topology[] = { -#ifdef CONFIG_SCHED_SMT - { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, -#endif - { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) }, - { cpu_mc_mask, SD_INIT_NAME(MC) }, - { cpu_cpu_mask, SD_INIT_NAME(DIE) }, - { NULL, }, -}; - static int __init init_big_cores(void) { int cpu; @@ -1083,9 +1085,32 @@ static int __init init_big_cores(void) return 0; } +/* + * die_mask and die_id are only available on systems which support + * multiple coregroups within a same package. On all other systems, die_mask + * would be same as package mask and die_id would be set to -1. + */ +const struct cpumask *cpu_die_mask(int cpu) +{ + if (has_coregroup_support()) + return per_cpu(cpu_coregroup_map, cpu); + else + return cpu_node_mask(cpu); +} +EXPORT_SYMBOL_GPL(cpu_die_mask); + +int cpu_die_id(int cpu) +{ + if (has_coregroup_support()) + return cpu_to_coregroup_id(cpu); + else + return -1; +} +EXPORT_SYMBOL_GPL(cpu_die_id); + void __init smp_prepare_cpus(unsigned int max_cpus) { - unsigned int cpu; + unsigned int cpu, num_threads; DBG("smp_prepare_cpus\n"); @@ -1152,9 +1177,15 @@ void __init smp_prepare_cpus(unsigned int max_cpus) if (smp_ops && smp_ops->probe) smp_ops->probe(); + + // Initalise the generic SMT topology support + num_threads = 1; + if (smt_enabled_at_boot) + num_threads = smt_enabled_at_boot; + cpu_smt_set_num_threads(num_threads, threads_per_core); } -void smp_prepare_boot_cpu(void) +void __init smp_prepare_boot_cpu(void) { BUG_ON(smp_processor_id() != boot_cpuid); #ifdef CONFIG_PPC64 @@ -1174,8 +1205,8 @@ int generic_cpu_disable(void) return -EBUSY; set_cpu_online(cpu, false); -#ifdef CONFIG_PPC64 - vdso_data->processorCount--; +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG + systemcfg->processorCount--; #endif /* Update affinity of all IRQs previously aimed at this CPU */ irq_migrate_all_off_this_cpu(); @@ -1435,7 +1466,7 @@ static bool update_mask_by_l2(int cpu, cpumask_var_t *mask) return false; } - cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu)); + cpumask_and(*mask, cpu_online_mask, cpu_node_mask(cpu)); /* Update l2-cache mask with all the CPUs that are part of submask */ or_cpumasks_related(cpu, cpu, submask_fn, cpu_l2_cache_mask); @@ -1525,7 +1556,7 @@ static void update_coregroup_mask(int cpu, cpumask_var_t *mask) return; } - cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu)); + cpumask_and(*mask, cpu_online_mask, cpu_node_mask(cpu)); /* Update coregroup mask with all the CPUs that are part of submask */ or_cpumasks_related(cpu, cpu, submask_fn, cpu_coregroup_mask); @@ -1555,7 +1586,7 @@ static void add_cpu_to_masks(int cpu) /* * This CPU will not be in the online mask yet so we need to manually - * add it to it's own thread sibling mask. + * add it to its own thread sibling mask. */ map_cpu_to_node(cpu, cpu_to_node(cpu)); cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); @@ -1586,9 +1617,9 @@ static void add_cpu_to_masks(int cpu) /* Skip all CPUs already part of current CPU core mask */ cpumask_andnot(mask, cpu_online_mask, cpu_core_mask(cpu)); - /* If chip_id is -1; limit the cpu_core_mask to within DIE*/ + /* If chip_id is -1; limit the cpu_core_mask to within PKG */ if (chip_id == -1) - cpumask_and(mask, mask, cpu_cpu_mask(cpu)); + cpumask_and(mask, mask, cpu_node_mask(cpu)); for_each_cpu(i, mask) { if (chip_id == cpu_to_chip_id(i)) { @@ -1603,6 +1634,7 @@ static void add_cpu_to_masks(int cpu) } /* Activate a secondary processor. */ +__no_stack_protector void start_secondary(void *unused) { unsigned int cpu = raw_smp_processor_id(); @@ -1611,12 +1643,15 @@ void start_secondary(void *unused) if (IS_ENABLED(CONFIG_PPC32)) setup_kup(); - mmgrab(&init_mm); + mmgrab_lazy_tlb(&init_mm); current->active_mm = &init_mm; + VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm))); + cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); + inc_mm_active_cpus(&init_mm); smp_store_cpu_info(cpu); set_dec(tb_ticks_per_jiffy); - rcu_cpu_starting(cpu); + rcutree_report_cpu_starting(cpu); cpu_callin_map[cpu] = 1; if (smp_ops->setup_cpu) @@ -1626,10 +1661,12 @@ void start_secondary(void *unused) secondary_cpu_time_init(); -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG if (system_state == SYSTEM_RUNNING) - vdso_data->processorCount++; + systemcfg->processorCount++; +#endif +#ifdef CONFIG_PPC64 vdso_getcpu_init(); #endif set_numa_node(numa_cpu_lookup_table[cpu]); @@ -1669,43 +1706,40 @@ void start_secondary(void *unused) BUG(); } -static void __init fixup_topology(void) +static struct sched_domain_topology_level powerpc_topology[6]; + +static void __init build_sched_topology(void) { - int i; + int i = 0; + + if (is_shared_processor() && has_big_cores) + static_branch_enable(&splpar_asym_pack); #ifdef CONFIG_SCHED_SMT if (has_big_cores) { pr_info("Big cores detected but using small core scheduling\n"); - powerpc_topology[smt_idx].mask = smallcore_smt_mask; + powerpc_topology[i++] = + SDTL_INIT(tl_smallcore_smt_mask, powerpc_smt_flags, SMT); + } else { + powerpc_topology[i++] = SDTL_INIT(tl_smt_mask, powerpc_smt_flags, SMT); } #endif + if (shared_caches) { + powerpc_topology[i++] = + SDTL_INIT(tl_cache_mask, powerpc_shared_cache_flags, CACHE); + } - if (!has_coregroup_support()) - powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask; - - /* - * Try to consolidate topology levels here instead of - * allowing scheduler to degenerate. - * - Dont consolidate if masks are different. - * - Dont consolidate if sd_flags exists and are different. - */ - for (i = 1; i <= die_idx; i++) { - if (powerpc_topology[i].mask != powerpc_topology[i - 1].mask) - continue; + if (has_coregroup_support()) { + powerpc_topology[i++] = + SDTL_INIT(tl_mc_mask, powerpc_shared_proc_flags, MC); + } - if (powerpc_topology[i].sd_flags && powerpc_topology[i - 1].sd_flags && - powerpc_topology[i].sd_flags != powerpc_topology[i - 1].sd_flags) - continue; + powerpc_topology[i++] = SDTL_INIT(tl_pkg_mask, powerpc_shared_proc_flags, PKG); - if (!powerpc_topology[i - 1].sd_flags) - powerpc_topology[i - 1].sd_flags = powerpc_topology[i].sd_flags; + /* There must be one trailing NULL entry left. */ + BUG_ON(i >= ARRAY_SIZE(powerpc_topology) - 1); - powerpc_topology[i].mask = powerpc_topology[i + 1].mask; - powerpc_topology[i].sd_flags = powerpc_topology[i + 1].sd_flags; -#ifdef CONFIG_SCHED_DEBUG - powerpc_topology[i].name = powerpc_topology[i + 1].name; -#endif - } + set_sched_topology(powerpc_topology); } void __init smp_cpus_done(unsigned int max_cpus) @@ -1720,9 +1754,20 @@ void __init smp_cpus_done(unsigned int max_cpus) smp_ops->bringup_done(); dump_numa_cpu_topology(); + build_sched_topology(); +} - fixup_topology(); - set_sched_topology(powerpc_topology); +/* + * For asym packing, by default lower numbered CPU has higher priority. + * On shared processors, pack to lower numbered core. However avoid moving + * between thread_groups within the same core. + */ +int arch_asym_cpu_priority(int cpu) +{ + if (static_branch_unlikely(&splpar_asym_pack)) + return -cpu / threads_per_core; + + return -cpu; } #ifdef CONFIG_HOTPLUG_CPU @@ -1748,11 +1793,19 @@ int __cpu_disable(void) void __cpu_die(unsigned int cpu) { + /* + * This could perhaps be a generic call in idlea_task_dead(), but + * that requires testing from all archs, so first put it here to + */ + VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(&init_mm))); + dec_mm_active_cpus(&init_mm); + cpumask_clear_cpu(cpu, mm_cpumask(&init_mm)); + if (smp_ops->cpu_die) smp_ops->cpu_die(cpu); } -void arch_cpu_idle_dead(void) +void __noreturn arch_cpu_idle_dead(void) { /* * Disable on the down path. This will be re-enabled by diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 5de8597eaab8..90882b5175cd 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -21,6 +21,7 @@ #include <asm/processor.h> #include <linux/ftrace.h> #include <asm/kprobes.h> +#include <linux/rethook.h> #include <asm/paca.h> @@ -73,29 +74,12 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum bool firstframe; stack_end = stack_page + THREAD_SIZE; - if (!is_idle_task(task)) { - /* - * For user tasks, this is the SP value loaded on - * kernel entry, see "PACAKSAVE(r13)" in _switch() and - * system_call_common(). - * - * Likewise for non-swapper kernel threads, - * this also happens to be the top of the stack - * as setup by copy_thread(). - * - * Note that stack backlinks are not properly setup by - * copy_thread() and thus, a forked task() will have - * an unreliable stack trace until it's been - * _switch()'ed to for the first time. - */ - stack_end -= STACK_USER_INT_FRAME_SIZE; - } else { - /* - * idle tasks have a custom stack layout, - * c.f. cpu_idle_thread_init(). - */ + + // See copy_thread() for details. + if (task->flags & PF_KTHREAD) stack_end -= STACK_FRAME_MIN_SIZE; - } + else + stack_end -= STACK_USER_INT_FRAME_SIZE; if (task == current) sp = current_stack_frame(); @@ -150,12 +134,13 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum * arch-dependent code, they are generic. */ ip = ftrace_graph_ret_addr(task, &graph_idx, ip, stack); -#ifdef CONFIG_KPROBES + /* * Mark stacktraces with kretprobed functions on them * as unreliable. */ - if (ip == (unsigned long)__kretprobe_trampoline) +#ifdef CONFIG_RETHOOK + if (ip == (unsigned long)arch_rethook_trampoline) return -EINVAL; #endif @@ -221,8 +206,8 @@ static void raise_backtrace_ipi(cpumask_t *mask) } } -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu) { - nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi); + nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_backtrace_ipi); } #endif /* defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) */ diff --git a/arch/powerpc/kernel/static_call.c b/arch/powerpc/kernel/static_call.c index 863a7aa24650..ec3101f95e53 100644 --- a/arch/powerpc/kernel/static_call.c +++ b/arch/powerpc/kernel/static_call.c @@ -2,32 +2,60 @@ #include <linux/memory.h> #include <linux/static_call.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) { int err; bool is_ret0 = (func == __static_call_return0); - unsigned long target = (unsigned long)(is_ret0 ? tramp + PPC_SCT_RET0 : func); - bool is_short = is_offset_in_branch_range((long)target - (long)tramp); - - if (!tramp) - return; + unsigned long _tramp = (unsigned long)tramp; + unsigned long _func = (unsigned long)func; + unsigned long _ret0 = _tramp + PPC_SCT_RET0; + bool is_short = is_offset_in_branch_range((long)func - (long)(site ? : tramp)); mutex_lock(&text_mutex); - if (func && !is_short) { - err = patch_instruction(tramp + PPC_SCT_DATA, ppc_inst(target)); - if (err) - goto out; + if (site && tail) { + if (!func) + err = patch_instruction(site, ppc_inst(PPC_RAW_BLR())); + else if (is_ret0) + err = patch_branch(site, _ret0, 0); + else if (is_short) + err = patch_branch(site, _func, 0); + else if (tramp) + err = patch_branch(site, _tramp, 0); + else + err = 0; + } else if (site) { + if (!func) + err = patch_instruction(site, ppc_inst(PPC_RAW_NOP())); + else if (is_ret0) + err = patch_instruction(site, ppc_inst(PPC_RAW_LI(_R3, 0))); + else if (is_short) + err = patch_branch(site, _func, BRANCH_SET_LINK); + else if (tramp) + err = patch_branch(site, _tramp, BRANCH_SET_LINK); + else + err = 0; + } else if (tramp) { + if (func && !is_short) { + err = patch_ulong(tramp + PPC_SCT_DATA, _func); + if (err) + goto out; + } + + if (!func) + err = patch_instruction(tramp, ppc_inst(PPC_RAW_BLR())); + else if (is_ret0) + err = patch_branch(tramp, _ret0, 0); + else if (is_short) + err = patch_branch(tramp, _func, 0); + else + err = patch_instruction(tramp, ppc_inst(PPC_RAW_NOP())); + } else { + err = 0; } - if (!func) - err = patch_instruction(tramp, ppc_inst(PPC_RAW_BLR())); - else if (is_short) - err = patch_branch(tramp, target, 0); - else - err = patch_instruction(tramp, ppc_inst(PPC_RAW_NOP())); out: mutex_unlock(&text_mutex); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/switch.S index 1bf1121e17f1..59e3ee99db0e 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/switch.S @@ -1,56 +1,21 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * PowerPC version - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP - * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu> - * Adapted for Power Macintosh by Paul Mackerras. - * Low-level exception handlers and MMU support - * rewritten by Paul Mackerras. - * Copyright (C) 1996 Paul Mackerras. - * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net). - * - * This file contains the system call entry code, context switch - * code, and exception/interrupt return code for PowerPC. - */ - #include <linux/objtool.h> -#include <linux/errno.h> -#include <linux/err.h> -#include <asm/cache.h> -#include <asm/unistd.h> -#include <asm/processor.h> -#include <asm/page.h> -#include <asm/mmu.h> -#include <asm/thread_info.h> +#include <asm/asm-offsets.h> #include <asm/code-patching-asm.h> +#include <asm/mmu.h> #include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> -#include <asm/cputable.h> -#include <asm/firmware.h> -#include <asm/bug.h> -#include <asm/ptrace.h> -#include <asm/irqflags.h> -#include <asm/hw_irq.h> -#include <asm/context_tracking.h> -#include <asm/ppc-opcode.h> -#include <asm/barrier.h> -#include <asm/export.h> -#include <asm/asm-compat.h> -#ifdef CONFIG_PPC_BOOK3S -#include <asm/exception-64s.h> -#else -#include <asm/exception-64e.h> -#endif -#include <asm/feature-fixups.h> #include <asm/kup.h> +#include <asm/thread_info.h> -/* - * System calls. - */ - .section ".text" +.section ".text","ax",@progbits #ifdef CONFIG_PPC_BOOK3S_64 +/* + * Cancel all explict user streams as they will have no use after context + * switch and will stop the HW from creating streams itself + */ +#define STOP_STREAMS \ + DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r6) #define FLUSH_COUNT_CACHE \ 1: nop; \ @@ -74,7 +39,6 @@ flush_branch_caches: // Flush the link stack .rept 64 - ANNOTATE_INTRA_FUNCTION_CALL bl .+4 .endr b 1f @@ -105,87 +69,13 @@ flush_branch_caches: .endr blr -#else -#define FLUSH_COUNT_CACHE -#endif /* CONFIG_PPC_BOOK3S_64 */ +#ifdef CONFIG_PPC_64S_HASH_MMU +.balign 32 /* - * This routine switches between two different tasks. The process - * state of one is saved on its kernel stack. Then the state - * of the other is restored from its kernel stack. The memory - * management hardware is updated to the second process's state. - * Finally, we can return to the second process, via interrupt_return. - * On entry, r3 points to the THREAD for the current task, r4 - * points to the THREAD for the new task. - * - * Note: there are two ways to get to the "going out" portion - * of this code; either by coming in via the entry (_switch) - * or via "fork" which must set up an environment equivalent - * to the "_switch" path. If you change this you'll have to change - * the fork code also. - * - * The code which creates the new task context is in 'copy_thread' - * in arch/powerpc/kernel/process.c + * New stack pointer in r8, old stack pointer in r1, must not clobber r3 */ - .align 7 -_GLOBAL(_switch) - mflr r0 - std r0,16(r1) - stdu r1,-SWITCH_FRAME_SIZE(r1) - /* r3-r13 are caller saved -- Cort */ - SAVE_NVGPRS(r1) - std r0,_NIP(r1) /* Return to switch caller */ - mfcr r23 - std r23,_CCR(r1) - std r1,KSP(r3) /* Set old stack pointer */ - - kuap_check_amr r9, r10 - - FLUSH_COUNT_CACHE /* Clobbers r9, ctr */ - - /* - * On SMP kernels, care must be taken because a task may be - * scheduled off CPUx and on to CPUy. Memory ordering must be - * considered. - * - * Cacheable stores on CPUx will be visible when the task is - * scheduled on CPUy by virtue of the core scheduler barriers - * (see "Notes on Program-Order guarantees on SMP systems." in - * kernel/sched/core.c). - * - * Uncacheable stores in the case of involuntary preemption must - * be taken care of. The smp_mb__after_spinlock() in __schedule() - * is implemented as hwsync on powerpc, which orders MMIO too. So - * long as there is an hwsync in the context switch path, it will - * be executed on the source CPU after the task has performed - * all MMIO ops on that CPU, and on the destination CPU before the - * task performs any MMIO ops there. - */ - - /* - * The kernel context switch path must contain a spin_lock, - * which contains larx/stcx, which will clear any reservation - * of the task being switched. - */ -#ifdef CONFIG_PPC_BOOK3S -/* Cancel all explict user streams as they will have no use after context - * switch and will stop the HW from creating streams itself - */ - DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r6) -#endif - - addi r6,r4,-THREAD /* Convert THREAD to 'current' */ - std r6,PACACURRENT(r13) /* Set new 'current' */ -#if defined(CONFIG_STACKPROTECTOR) - ld r6, TASK_CANARY(r6) - std r6, PACA_CANARY(r13) -#endif - - ld r8,KSP(r4) /* new stack pointer */ -#ifdef CONFIG_PPC_64S_HASH_MMU -BEGIN_MMU_FTR_SECTION - b 2f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) +pin_stack_slb: BEGIN_FTR_SECTION clrrdi r6,r8,28 /* get its ESID */ clrrdi r9,r1,28 /* get current sp ESID */ @@ -233,15 +123,57 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) slbmte r7,r0 isync -2: +2: blr + .size pin_stack_slb,.-pin_stack_slb #endif /* CONFIG_PPC_64S_HASH_MMU */ - clrrdi r7, r8, THREAD_SHIFT /* base of new stack */ - /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE - because we don't need to leave the 288-byte ABI gap at the - top of the kernel stack. */ +#else +#define STOP_STREAMS +#define FLUSH_COUNT_CACHE +#endif /* CONFIG_PPC_BOOK3S_64 */ + +/* + * do_switch_32/64 have the same calling convention as _switch, i.e., r3,r4 + * are prev and next thread_struct *, and returns prev task_struct * in r3. + + * This switches the stack, current, and does other task switch housekeeping. + */ +.macro do_switch_32 + tophys(r0,r4) + mtspr SPRN_SPRG_THREAD,r0 /* Update current THREAD phys addr */ + lwz r1,KSP(r4) /* Load new stack pointer */ + + /* save the old current 'last' for return value */ + mr r3,r2 + addi r2,r4,-THREAD /* Update current */ +.endm + +.macro do_switch_64 + ld r8,KSP(r4) /* Load new stack pointer */ + + kuap_check_amr r9, r10 + + FLUSH_COUNT_CACHE /* Clobbers r9, ctr */ + + STOP_STREAMS /* Clobbers r6 */ + + addi r3,r3,-THREAD /* old thread -> task_struct for return value */ + addi r6,r4,-THREAD /* new thread -> task_struct */ + std r6,PACACURRENT(r13) /* Set new task_struct to 'current' */ +#if defined(CONFIG_STACKPROTECTOR) + ld r6, TASK_CANARY(r6) + std r6, PACA_CANARY(r13) +#endif + /* Set new PACAKSAVE */ + clrrdi r7,r8,THREAD_SHIFT /* base of new stack */ addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE + std r7,PACAKSAVE(r13) +#ifdef CONFIG_PPC_64S_HASH_MMU +BEGIN_MMU_FTR_SECTION + bl pin_stack_slb +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) +#endif /* * PMU interrupts in radix may come in here. They will use r1, not * PACAKSAVE, so this stack switch will not cause a problem. They @@ -251,81 +183,75 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) * active on the new CPU, will order those stores. */ mr r1,r8 /* start using new stack pointer */ - std r7,PACAKSAVE(r13) +.endm + +/* + * This routine switches between two different tasks. The process + * state of one is saved on its kernel stack. Then the state + * of the other is restored from its kernel stack. The memory + * management hardware is updated to the second process's state. + * Finally, we can return to the second process. + * On entry, r3 points to the THREAD for the current task, r4 + * points to the THREAD for the new task. + * + * This routine is always called with interrupts disabled. + * + * Note: there are two ways to get to the "going out" portion + * of this code; either by coming in via the entry (_switch) + * or via "fork" which must set up an environment equivalent + * to the "_switch" path. If you change this , you'll have to + * change the fork code also. + * + * The code which creates the new task context is in 'copy_thread' + * in arch/ppc/kernel/process.c + * + * Note: this uses SWITCH_FRAME_SIZE rather than USER_INT_FRAME_SIZE + * because we don't need to leave the redzone ABI gap at the top of + * the kernel stack. + */ +_GLOBAL(_switch) + PPC_CREATE_STACK_FRAME(SWITCH_FRAME_SIZE) + PPC_STL r1,KSP(r3) /* Set old stack pointer */ + SAVE_NVGPRS(r1) /* volatiles are caller-saved -- Cort */ + PPC_STL r0,_NIP(r1) /* Return to switch caller */ + mfcr r0 + stw r0,_CCR(r1) - ld r6,_CCR(r1) - mtcrf 0xFF,r6 + /* + * On SMP kernels, care must be taken because a task may be + * scheduled off CPUx and on to CPUy. Memory ordering must be + * considered. + * + * Cacheable stores on CPUx will be visible when the task is + * scheduled on CPUy by virtue of the core scheduler barriers + * (see "Notes on Program-Order guarantees on SMP systems." in + * kernel/sched/core.c). + * + * Uncacheable stores in the case of involuntary preemption must + * be taken care of. The smp_mb__after_spinlock() in __schedule() + * is implemented as hwsync on powerpc, which orders MMIO too. So + * long as there is an hwsync in the context switch path, it will + * be executed on the source CPU after the task has performed + * all MMIO ops on that CPU, and on the destination CPU before the + * task performs any MMIO ops there. + */ + + /* + * The kernel context switch path must contain a spin_lock, + * which contains larx/stcx, which will clear any reservation + * of the task being switched. + */ - /* r3-r13 are destroyed -- Cort */ - REST_NVGPRS(r1) +#ifdef CONFIG_PPC32 + do_switch_32 +#else + do_switch_64 +#endif - /* convert old thread to its task_struct for return value */ - addi r3,r3,-THREAD - ld r7,_NIP(r1) /* Return to _switch caller in new task */ - mtlr r7 + lwz r0,_CCR(r1) + mtcrf 0xFF,r0 + REST_NVGPRS(r1) /* volatiles are destroyed -- Cort */ + PPC_LL r0,_NIP(r1) /* Return to _switch caller in new task */ + mtlr r0 addi r1,r1,SWITCH_FRAME_SIZE blr - -_GLOBAL(enter_prom) - mflr r0 - std r0,16(r1) - stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space */ - - /* Because PROM is running in 32b mode, it clobbers the high order half - * of all registers that it saves. We therefore save those registers - * PROM might touch to the stack. (r0, r3-r13 are caller saved) - */ - SAVE_GPR(2, r1) - SAVE_GPR(13, r1) - SAVE_NVGPRS(r1) - mfcr r10 - mfmsr r11 - std r10,_CCR(r1) - std r11,_MSR(r1) - - /* Put PROM address in SRR0 */ - mtsrr0 r4 - - /* Setup our trampoline return addr in LR */ - bcl 20,31,$+4 -0: mflr r4 - addi r4,r4,(1f - 0b) - mtlr r4 - - /* Prepare a 32-bit mode big endian MSR - */ -#ifdef CONFIG_PPC_BOOK3E_64 - rlwinm r11,r11,0,1,31 - mtsrr1 r11 - rfi -#else /* CONFIG_PPC_BOOK3E_64 */ - LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_LE) - andc r11,r11,r12 - mtsrr1 r11 - RFI_TO_KERNEL -#endif /* CONFIG_PPC_BOOK3E_64 */ - -1: /* Return from OF */ - FIXUP_ENDIAN - - /* Just make sure that r1 top 32 bits didn't get - * corrupt by OF - */ - rldicl r1,r1,0,32 - - /* Restore the MSR (back to 64 bits) */ - ld r0,_MSR(r1) - MTMSRD(r0) - isync - - /* Restore other registers */ - REST_GPR(2, r1) - REST_GPR(13, r1) - REST_NVGPRS(r1) - ld r4,_CCR(r1) - mtcr r4 - - addi r1,r1,SWITCH_FRAME_SIZE - ld r0,16(r1) - mtlr r0 - blr diff --git a/arch/powerpc/kernel/swsusp_64.c b/arch/powerpc/kernel/swsusp_64.c index 16ee3baaf09a..50fa8fc9ef95 100644 --- a/arch/powerpc/kernel/swsusp_64.c +++ b/arch/powerpc/kernel/swsusp_64.c @@ -11,6 +11,8 @@ #include <linux/interrupt.h> #include <linux/nmi.h> +void do_after_copyback(void); + void do_after_copyback(void) { iommu_restore(); diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c index 18b9d325395f..be159ad4b77b 100644 --- a/arch/powerpc/kernel/syscall.c +++ b/arch/powerpc/kernel/syscall.c @@ -27,11 +27,11 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0) trace_hardirqs_off(); /* finish reconciling */ - CT_WARN_ON(ct_state() == CONTEXT_KERNEL); + CT_WARN_ON(ct_state() == CT_STATE_KERNEL); user_exit_irqoff(); BUG_ON(regs_is_unrecoverable(regs)); - BUG_ON(!(regs->msr & MSR_PR)); + BUG_ON(!user_mode(regs)); BUG_ON(arch_irq_disabled_regs(regs)); #ifdef CONFIG_PPC_PKEY @@ -46,7 +46,7 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0) iamr = mfspr(SPRN_IAMR); regs->amr = amr; regs->iamr = iamr; - if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { + if (mmu_has_feature(MMU_FTR_KUAP)) { mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); flush_needed = true; } diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index a0be127475b1..ec4458cdb97b 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -230,8 +230,10 @@ 178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 179 32 pread64 sys_ppc_pread64 compat_sys_ppc_pread64 179 64 pread64 sys_pread64 +179 spu pread64 sys_pread64 180 32 pwrite64 sys_ppc_pwrite64 compat_sys_ppc_pwrite64 180 64 pwrite64 sys_pwrite64 +180 spu pwrite64 sys_pwrite64 181 common chown sys_chown 182 common getcwd sys_getcwd 183 common capget sys_capget @@ -246,6 +248,7 @@ 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 32 readahead sys_ppc_readahead compat_sys_ppc_readahead 191 64 readahead sys_readahead +191 spu readahead sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 193 32 truncate64 sys_ppc_truncate64 compat_sys_ppc_truncate64 194 32 ftruncate64 sys_ppc_ftruncate64 compat_sys_ppc_ftruncate64 @@ -293,8 +296,9 @@ 232 nospu set_tid_address sys_set_tid_address 233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64 233 64 fadvise64 sys_fadvise64 +233 spu fadvise64 sys_fadvise64 234 nospu exit_group sys_exit_group -235 nospu lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie +235 nospu lookup_dcookie sys_ni_syscall 236 common epoll_create sys_epoll_create 237 common epoll_ctl sys_epoll_ctl 238 common epoll_wait sys_epoll_wait @@ -502,7 +506,7 @@ 412 32 utimensat_time64 sys_utimensat sys_utimensat 413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents +416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend 419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive @@ -537,3 +541,23 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node +451 common cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 +453 common map_shadow_stack sys_ni_syscall +454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue +457 common statmount sys_statmount +458 common listmount sys_listmount +459 common lsm_get_self_attr sys_lsm_get_self_attr +460 common lsm_set_self_attr sys_lsm_set_self_attr +461 common lsm_list_modules sys_lsm_list_modules +462 common mseal sys_mseal +463 common setxattrat sys_setxattrat +464 common getxattrat sys_getxattrat +465 common listxattrat sys_listxattrat +466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index ef9a61718940..6b3dd6decdf9 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -17,6 +17,7 @@ #include <asm/hvcall.h> #include <asm/machdep.h> #include <asm/smp.h> +#include <asm/time.h> #include <asm/pmc.h> #include <asm/firmware.h> #include <asm/idle.h> @@ -139,7 +140,7 @@ static unsigned long dscr_default; * @val: Returned cpu specific DSCR default value * * This function returns the per cpu DSCR default value - * for any cpu which is contained in it's PACA structure. + * for any cpu which is contained in its PACA structure. */ static void read_dscr(void *val) { @@ -152,7 +153,7 @@ static void read_dscr(void *val) * @val: New cpu specific DSCR default value to update * * This function updates the per cpu DSCR default value - * for any cpu which is contained in it's PACA structure. + * for any cpu which is contained in its PACA structure. */ static void write_dscr(void *val) { @@ -217,13 +218,18 @@ static DEVICE_ATTR(dscr_default, 0600, static void __init sysfs_create_dscr_default(void) { if (cpu_has_feature(CPU_FTR_DSCR)) { + struct device *dev_root; int cpu; dscr_default = spr_default_dscr; for_each_possible_cpu(cpu) paca_ptrs[cpu]->dscr_default = dscr_default; - device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); + dev_root = bus_get_dev_root(&cpu_subsys); + if (dev_root) { + device_create_file(dev_root, &dev_attr_dscr_default); + put_device(dev_root); + } } } #endif /* CONFIG_PPC64 */ @@ -746,7 +752,12 @@ static DEVICE_ATTR(svm, 0444, show_svm, NULL); static void __init create_svm_file(void) { - device_create_file(cpu_subsys.dev_root, &dev_attr_svm); + struct device *dev_root = bus_get_dev_root(&cpu_subsys); + + if (dev_root) { + device_create_file(dev_root, &dev_attr_svm); + put_device(dev_root); + } } #else static void __init create_svm_file(void) diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 828d0f4106d2..cba6dd15de3b 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -200,7 +200,7 @@ static int __init TAU_init(void) tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) && !strcmp(cur_cpu_spec->platform, "ppc750"); - tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1); + tau_workq = alloc_ordered_workqueue("tau", 0); if (!tau_workq) return -ENOMEM; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index d68de3618741..4bbeb8644d3d 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -71,11 +71,11 @@ #include <asm/vdso_datapage.h> #include <asm/firmware.h> #include <asm/mce.h> +#include <asm/systemcfg.h> /* powerpc clocksource/clockevent code */ #include <linux/clockchips.h> -#include <linux/timekeeper_internal.h> static u64 timebase_read(struct clocksource *); static struct clocksource clocksource_timebase = { @@ -137,7 +137,7 @@ EXPORT_SYMBOL_GPL(rtc_lock); static u64 tb_to_ns_scale __read_mostly; static unsigned tb_to_ns_shift __read_mostly; -static u64 boot_tb __read_mostly; +static u64 boot_tb __ro_after_init; extern struct timezone sys_tz; static long timezone_offset; @@ -354,9 +354,31 @@ void vtime_flush(struct task_struct *tsk) acct->hardirq_time = 0; acct->softirq_time = 0; } + +/* + * Called from the context switch with interrupts disabled, to charge all + * accumulated times to the current process, and to prepare accounting on + * the next process. + */ +void vtime_task_switch(struct task_struct *prev) +{ + if (is_idle_task(prev)) + vtime_account_idle(prev); + else + vtime_account_kernel(prev); + + vtime_flush(prev); + + if (!IS_ENABLED(CONFIG_PPC64)) { + struct cpu_accounting_data *acct = get_accounting(current); + struct cpu_accounting_data *acct0 = get_accounting(prev); + + acct->starttime = acct0->starttime; + } +} #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ -void __delay(unsigned long loops) +void __no_kcsan __delay(unsigned long loops) { unsigned long start; @@ -377,7 +399,7 @@ void __delay(unsigned long loops) } EXPORT_SYMBOL(__delay); -void udelay(unsigned long usecs) +void __no_kcsan udelay(unsigned long usecs) { __delay(tb_ticks_per_usec * usecs); } @@ -515,7 +537,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) } /* Conditionally hard-enable interrupts. */ - if (should_hard_irq_enable()) { + if (should_hard_irq_enable(regs)) { /* * Ensure a positive value is written to the decrementer, or * else some CPUs will continue to take decrementer exceptions. @@ -617,6 +639,12 @@ notrace unsigned long long sched_clock(void) return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift; } +#ifdef CONFIG_PPC_SPLPAR +u64 get_boot_tb(void) +{ + return boot_tb; +} +#endif #ifdef CONFIG_PPC_PSERIES @@ -673,7 +701,7 @@ static int __init get_freq(char *name, int cells, unsigned long *val) static void start_cpu_decrementer(void) { -#ifdef CONFIG_BOOKE_OR_40x +#ifdef CONFIG_BOOKE unsigned int tcr; /* Clear any pending timer interrupts */ @@ -879,6 +907,38 @@ void secondary_cpu_time_init(void) register_decrementer_clockevent(smp_processor_id()); } +/* + * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit + * result. + */ +static __init void div128_by_32(u64 dividend_high, u64 dividend_low, + unsigned int divisor, struct div_result *dr) +{ + unsigned long a, b, c, d; + unsigned long w, x, y, z; + u64 ra, rb, rc; + + a = dividend_high >> 32; + b = dividend_high & 0xffffffff; + c = dividend_low >> 32; + d = dividend_low & 0xffffffff; + + w = a / divisor; + ra = ((u64)(a - (w * divisor)) << 32) + b; + + rb = ((u64)do_div(ra, divisor) << 32) + c; + x = ra; + + rc = ((u64)do_div(rb, divisor) << 32) + d; + y = rb; + + do_div(rc, divisor); + z = rc; + + dr->result_high = ((u64)w << 32) + x; + dr->result_low = ((u64)y << 32) + z; +} + /* This function is only called on the boot processor */ void __init time_init(void) { @@ -887,7 +947,11 @@ void __init time_init(void) unsigned shift; /* Normal PowerPC with timebase register */ - ppc_md.calibrate_decr(); + if (ppc_md.calibrate_decr) + ppc_md.calibrate_decr(); + else + generic_calibrate_decr(); + printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n", ppc_tb_freq / 1000000, ppc_tb_freq % 1000000); printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n", @@ -924,7 +988,10 @@ void __init time_init(void) sys_tz.tz_dsttime = 0; } - vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; + vdso_k_arch_data->tb_ticks_per_sec = tb_ticks_per_sec; +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG + systemcfg->tb_ticks_per_sec = tb_ticks_per_sec; +#endif /* initialise and enable the large decrementer (if we have one) */ set_decrementer_max(); @@ -945,39 +1012,6 @@ void __init time_init(void) enable_sched_clock_irqtime(); } -/* - * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit - * result. - */ -void div128_by_32(u64 dividend_high, u64 dividend_low, - unsigned divisor, struct div_result *dr) -{ - unsigned long a, b, c, d; - unsigned long w, x, y, z; - u64 ra, rb, rc; - - a = dividend_high >> 32; - b = dividend_high & 0xffffffff; - c = dividend_low >> 32; - d = dividend_low & 0xffffffff; - - w = a / divisor; - ra = ((u64)(a - (w * divisor)) << 32) + b; - - rb = ((u64) do_div(ra, divisor) << 32) + c; - x = ra; - - rc = ((u64) do_div(rb, divisor) << 32) + d; - y = rb; - - do_div(rc, divisor); - z = rc; - - dr->result_high = ((u64)w << 32) + x; - dr->result_low = ((u64)y << 32) + z; - -} - /* We don't need to calibrate delay, we use the CPU timebase for that */ void calibrate_delay(void) { diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 9feab5e0485b..a9cd6507163a 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -6,13 +6,13 @@ * Copyright 2012 Matt Evans & Michael Neuling, IBM Corporation. */ +#include <linux/export.h> #include <asm/asm-offsets.h> #include <asm/ppc_asm.h> #include <asm/ppc-opcode.h> #include <asm/ptrace.h> #include <asm/reg.h> #include <asm/bug.h> -#include <asm/export.h> #include <asm/feature-fixups.h> #ifdef CONFIG_VSX diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile index af8527538fe4..d6c3885453bd 100644 --- a/arch/powerpc/kernel/trace/Makefile +++ b/arch/powerpc/kernel/trace/Makefile @@ -6,15 +6,18 @@ ifdef CONFIG_FUNCTION_TRACER # do not trace tracer code CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_ftrace_64_pg.o = $(CC_FLAGS_FTRACE) endif -obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_mprofile.o -ifdef CONFIG_MPROFILE_KERNEL -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_mprofile.o +ifdef CONFIG_FUNCTION_TRACER +obj32-y += ftrace.o ftrace_entry.o +ifeq ($(CONFIG_MPROFILE_KERNEL)$(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY),) +obj64-y += ftrace_64_pg.o ftrace_64_pg_entry.o else -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o +obj64-y += ftrace.o ftrace_entry.o +endif endif -obj-$(CONFIG_FUNCTION_TRACER) += ftrace_low.o ftrace.o + obj-$(CONFIG_TRACING) += trace_clock.o obj-$(CONFIG_PPC64) += $(obj64-y) @@ -23,4 +26,9 @@ obj-$(CONFIG_PPC32) += $(obj32-y) # Disable GCOV, KCOV & sanitizers in odd or sensitive code GCOV_PROFILE_ftrace.o := n KCOV_INSTRUMENT_ftrace.o := n +KCSAN_SANITIZE_ftrace.o := n UBSAN_SANITIZE_ftrace.o := n +GCOV_PROFILE_ftrace_64_pg.o := n +KCOV_INSTRUMENT_ftrace_64_pg.o := n +KCSAN_SANITIZE_ftrace_64_pg.o := n +UBSAN_SANITIZE_ftrace_64_pg.o := n diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 7b85c3b460a3..841d077e2825 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -23,206 +23,85 @@ #include <linux/list.h> #include <asm/cacheflush.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/ftrace.h> #include <asm/syscall.h> #include <asm/inst.h> +#include <asm/sections.h> -/* - * We generally only have a single long_branch tramp and at most 2 or 3 plt - * tramps generated. But, we don't use the plt tramps currently. We also allot - * 2 tramps after .text and .init.text. So, we only end up with around 3 usable - * tramps in total. Set aside 8 just to be sure. - */ -#define NUM_FTRACE_TRAMPS 8 +#define NUM_FTRACE_TRAMPS 2 static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS]; -static ppc_inst_t -ftrace_call_replace(unsigned long ip, unsigned long addr, int link) +unsigned long ftrace_call_adjust(unsigned long addr) { - ppc_inst_t op; + if (addr >= (unsigned long)__exittext_begin && addr < (unsigned long)__exittext_end) + return 0; - addr = ppc_function_entry((void *)addr); + if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY) && + !IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { + addr += MCOUNT_INSN_SIZE; + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + addr += MCOUNT_INSN_SIZE; + } - /* if (link) set op to 'bl' else 'b' */ + return addr; +} + +static ppc_inst_t ftrace_create_branch_inst(unsigned long ip, unsigned long addr, int link) +{ + ppc_inst_t op; + + WARN_ON(!is_offset_in_branch_range(addr - ip)); create_branch(&op, (u32 *)ip, addr, link ? BRANCH_SET_LINK : 0); return op; } -static inline int -ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new) +static inline int ftrace_read_inst(unsigned long ip, ppc_inst_t *op) { - ppc_inst_t replaced; - - /* - * Note: - * We are paranoid about modifying text, as if a bug was to happen, it - * could cause us to read or write to someplace that could cause harm. - * Carefully read and modify the code with probe_kernel_*(), and make - * sure what we read is what we expected it to be before modifying it. - */ - - /* read the text we want to modify */ - if (copy_inst_from_kernel_nofault(&replaced, (void *)ip)) + if (copy_inst_from_kernel_nofault(op, (void *)ip)) { + pr_err("0x%lx: fetching instruction failed\n", ip); return -EFAULT; - - /* Make sure it is what we expect it to be */ - if (!ppc_inst_equal(replaced, old)) { - pr_err("%p: replaced (%08lx) != old (%08lx)", (void *)ip, - ppc_inst_as_ulong(replaced), ppc_inst_as_ulong(old)); - return -EINVAL; } - /* replace the text with the new text */ - return patch_instruction((u32 *)ip, new); + return 0; } -/* - * Helper functions that are the same for both PPC64 and PPC32. - */ -static int test_24bit_addr(unsigned long ip, unsigned long addr) +static inline int ftrace_validate_inst(unsigned long ip, ppc_inst_t inst) { - addr = ppc_function_entry((void *)addr); - - return is_offset_in_branch_range(addr - ip); -} + ppc_inst_t op; + int ret; -static int is_bl_op(ppc_inst_t op) -{ - return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BL(0); -} + ret = ftrace_read_inst(ip, &op); + if (!ret && !ppc_inst_equal(op, inst)) { + pr_err("0x%lx: expected (%08lx) != found (%08lx)\n", + ip, ppc_inst_as_ulong(inst), ppc_inst_as_ulong(op)); + ret = -EINVAL; + } -static int is_b_op(ppc_inst_t op) -{ - return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BRANCH(0); + return ret; } -static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op) +static inline int ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new) { - int offset; + int ret = ftrace_validate_inst(ip, old); - offset = PPC_LI(ppc_inst_val(op)); - /* make it signed */ - if (offset & 0x02000000) - offset |= 0xfe000000; + if (!ret && !ppc_inst_equal(old, new)) + ret = patch_instruction((u32 *)ip, new); - return ip + (long)offset; + return ret; } -#ifdef CONFIG_MODULES -static int -__ftrace_make_nop(struct module *mod, - struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned long entry, ptr, tramp; - unsigned long ip = rec->ip; - ppc_inst_t op, pop; - - /* read where this goes */ - if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { - pr_err("Fetching opcode failed.\n"); - return -EFAULT; - } - - /* Make sure that this is still a 24bit jump */ - if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); - return -EINVAL; - } - - /* lets find where the pointer goes */ - tramp = find_bl_target(ip, op); - - pr_devel("ip:%lx jumps to %lx", ip, tramp); - - if (module_trampoline_target(mod, tramp, &ptr)) { - pr_err("Failed to get trampoline target\n"); - return -EFAULT; - } - - pr_devel("trampoline target %lx", ptr); - - entry = ppc_global_function_entry((void *)addr); - /* This should match what was called */ - if (ptr != entry) { - pr_err("addr %lx does not match expected %lx\n", ptr, entry); - return -EINVAL; - } - - if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) { - if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) { - pr_err("Fetching instruction at %lx failed.\n", ip - 4); - return -EFAULT; - } - - /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ - if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_MFLR(_R0))) && - !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) { - pr_err("Unexpected instruction %08lx around bl _mcount\n", - ppc_inst_as_ulong(op)); - return -EINVAL; - } - } else if (IS_ENABLED(CONFIG_PPC64)) { - /* - * Check what is in the next instruction. We can see ld r2,40(r1), but - * on first pass after boot we will see mflr r0. - */ - if (copy_inst_from_kernel_nofault(&op, (void *)(ip + 4))) { - pr_err("Fetching op failed.\n"); - return -EFAULT; - } - - if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) { - pr_err("Expected %08lx found %08lx\n", PPC_INST_LD_TOC, - ppc_inst_as_ulong(op)); - return -EINVAL; - } - } - - /* - * When using -mprofile-kernel or PPC32 there is no load to jump over. - * - * Otherwise our original call site looks like: - * - * bl <tramp> - * ld r2,XX(r1) - * - * Milton Miller pointed out that we can not simply nop the branch. - * If a task was preempted when calling a trace function, the nops - * will remove the way to restore the TOC in r2 and the r2 TOC will - * get corrupted. - * - * Use a b +8 to jump over the load. - */ - if (IS_ENABLED(CONFIG_MPROFILE_KERNEL) || IS_ENABLED(CONFIG_PPC32)) - pop = ppc_inst(PPC_RAW_NOP()); - else - pop = ppc_inst(PPC_RAW_BRANCH(8)); /* b +8 */ - - if (patch_instruction((u32 *)ip, pop)) { - pr_err("Patching NOP failed.\n"); - return -EPERM; - } - - return 0; -} -#else -static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) +static int is_bl_op(ppc_inst_t op) { - return 0; + return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BL(0); } -#endif /* CONFIG_MODULES */ static unsigned long find_ftrace_tramp(unsigned long ip) { int i; - /* - * We have the compiler generated long_branch tramps at the end - * and we prefer those - */ - for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--) + for (i = 0; i < NUM_FTRACE_TRAMPS; i++) if (!ftrace_tramps[i]) continue; else if (is_offset_in_branch_range(ftrace_tramps[i] - ip)) @@ -231,449 +110,424 @@ static unsigned long find_ftrace_tramp(unsigned long ip) return 0; } -static int add_ftrace_tramp(unsigned long tramp) +#ifdef CONFIG_MODULES +static unsigned long ftrace_lookup_module_stub(unsigned long ip, unsigned long addr) { - int i; + struct module *mod = NULL; - for (i = 0; i < NUM_FTRACE_TRAMPS; i++) - if (!ftrace_tramps[i]) { - ftrace_tramps[i] = tramp; - return 0; - } + scoped_guard(rcu) + mod = __module_text_address(ip); + if (!mod) + pr_err("No module loaded at addr=%lx\n", ip); - return -1; + return (addr == (unsigned long)ftrace_caller ? mod->arch.tramp : mod->arch.tramp_regs); } - -/* - * If this is a compiler generated long_branch trampoline (essentially, a - * trampoline that has a branch to _mcount()), we re-write the branch to - * instead go to ftrace_[regs_]caller() and note down the location of this - * trampoline. - */ -static int setup_mcount_compiler_tramp(unsigned long tramp) +#else +static unsigned long ftrace_lookup_module_stub(unsigned long ip, unsigned long addr) { - int i; - ppc_inst_t op; - unsigned long ptr; - - /* Is this a known long jump tramp? */ - for (i = 0; i < NUM_FTRACE_TRAMPS; i++) - if (ftrace_tramps[i] == tramp) - return 0; - - /* New trampoline -- read where this goes */ - if (copy_inst_from_kernel_nofault(&op, (void *)tramp)) { - pr_debug("Fetching opcode failed.\n"); - return -1; - } + return 0; +} +#endif - /* Is this a 24 bit branch? */ - if (!is_b_op(op)) { - pr_debug("Trampoline is not a long branch tramp.\n"); - return -1; - } +static unsigned long ftrace_get_ool_stub(struct dyn_ftrace *rec) +{ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + return rec->arch.ool_stub; +#else + BUILD_BUG(); +#endif +} - /* lets find where the pointer goes */ - ptr = find_bl_target(tramp, op); +static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_inst_t *call_inst) +{ + unsigned long ip; + unsigned long stub; - if (ptr != ppc_global_function_entry((void *)_mcount)) { - pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr); - return -1; + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + ip = ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE; /* second instruction in stub */ + else + ip = rec->ip; + + if (!is_offset_in_branch_range(addr - ip) && addr != FTRACE_ADDR && + addr != FTRACE_REGS_ADDR) { + /* This can only happen with ftrace direct */ + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS)) { + pr_err("0x%lx (0x%lx): Unexpected target address 0x%lx\n", + ip, rec->ip, addr); + return -EINVAL; + } + addr = FTRACE_ADDR; } - /* Let's re-write the tramp to go to ftrace_[regs_]caller */ - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) - ptr = ppc_global_function_entry((void *)ftrace_regs_caller); + if (is_offset_in_branch_range(addr - ip)) + /* Within range */ + stub = addr; + else if (core_kernel_text(ip)) + /* We would be branching to one of our ftrace stubs */ + stub = find_ftrace_tramp(ip); else - ptr = ppc_global_function_entry((void *)ftrace_caller); + stub = ftrace_lookup_module_stub(ip, addr); - if (patch_branch((u32 *)tramp, ptr, 0)) { - pr_debug("REL24 out of range!\n"); - return -1; - } - - if (add_ftrace_tramp(tramp)) { - pr_debug("No tramp locations left\n"); - return -1; + if (!stub) { + pr_err("0x%lx (0x%lx): No ftrace stubs reachable\n", ip, rec->ip); + return -EINVAL; } + *call_inst = ftrace_create_branch_inst(ip, stub, 1); return 0; } -static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) +static int ftrace_init_ool_stub(struct module *mod, struct dyn_ftrace *rec) { - unsigned long tramp, ip = rec->ip; - ppc_inst_t op; +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + static int ool_stub_text_index, ool_stub_text_end_index, ool_stub_inittext_index; + int ret = 0, ool_stub_count, *ool_stub_index; + ppc_inst_t inst; + /* + * See ftrace_entry.S if changing the below instruction sequence, as we rely on + * decoding the last branch instruction here to recover the correct function ip. + */ + struct ftrace_ool_stub *ool_stub, ool_stub_template = { + .insn = { + PPC_RAW_MFLR(_R0), + PPC_RAW_NOP(), /* bl ftrace_caller */ + PPC_RAW_MTLR(_R0), + PPC_RAW_NOP() /* b rec->ip + 4 */ + } + }; - /* Read where this goes */ - if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { - pr_err("Fetching opcode failed.\n"); - return -EFAULT; - } + WARN_ON(rec->arch.ool_stub); - /* Make sure that this is still a 24bit jump */ - if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); + if (is_kernel_inittext(rec->ip)) { + ool_stub = ftrace_ool_stub_inittext; + ool_stub_index = &ool_stub_inittext_index; + ool_stub_count = ftrace_ool_stub_inittext_count; + } else if (is_kernel_text(rec->ip)) { + /* + * ftrace records are sorted, so we first use up the stub area within .text + * (ftrace_ool_stub_text) before using the area at the end of .text + * (ftrace_ool_stub_text_end), unless the stub is out of range of the record. + */ + if (ool_stub_text_index >= ftrace_ool_stub_text_count || + !is_offset_in_branch_range((long)rec->ip - + (long)&ftrace_ool_stub_text[ool_stub_text_index])) { + ool_stub = ftrace_ool_stub_text_end; + ool_stub_index = &ool_stub_text_end_index; + ool_stub_count = ftrace_ool_stub_text_end_count; + } else { + ool_stub = ftrace_ool_stub_text; + ool_stub_index = &ool_stub_text_index; + ool_stub_count = ftrace_ool_stub_text_count; + } +#ifdef CONFIG_MODULES + } else if (mod) { + ool_stub = mod->arch.ool_stubs; + ool_stub_index = &mod->arch.ool_stub_index; + ool_stub_count = mod->arch.ool_stub_count; +#endif + } else { return -EINVAL; } - /* Let's find where the pointer goes */ - tramp = find_bl_target(ip, op); + ool_stub += (*ool_stub_index)++; - pr_devel("ip:%lx jumps to %lx", ip, tramp); + if (WARN_ON(*ool_stub_index > ool_stub_count)) + return -EINVAL; - if (setup_mcount_compiler_tramp(tramp)) { - /* Are other trampolines reachable? */ - if (!find_ftrace_tramp(ip)) { - pr_err("No ftrace trampolines reachable from %ps\n", - (void *)ip); - return -EINVAL; - } + if (!is_offset_in_branch_range((long)rec->ip - (long)&ool_stub->insn[0]) || + !is_offset_in_branch_range((long)(rec->ip + MCOUNT_INSN_SIZE) - + (long)&ool_stub->insn[3])) { + pr_err("%s: ftrace ool stub out of range (%p -> %p).\n", + __func__, (void *)rec->ip, (void *)&ool_stub->insn[0]); + return -EINVAL; } - if (patch_instruction((u32 *)ip, ppc_inst(PPC_RAW_NOP()))) { - pr_err("Patching NOP failed.\n"); - return -EPERM; - } + rec->arch.ool_stub = (unsigned long)&ool_stub->insn[0]; - return 0; + /* bl ftrace_caller */ + if (!mod) + ret = ftrace_get_call_inst(rec, (unsigned long)ftrace_caller, &inst); +#ifdef CONFIG_MODULES + else + /* + * We can't use ftrace_get_call_inst() since that uses + * __module_text_address(rec->ip) to look up the module. + * But, since the module is not fully formed at this stage, + * the lookup fails. We know the target though, so generate + * the branch inst directly. + */ + inst = ftrace_create_branch_inst(ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE, + mod->arch.tramp, 1); +#endif + ool_stub_template.insn[1] = ppc_inst_val(inst); + + /* b rec->ip + 4 */ + if (!ret && create_branch(&inst, &ool_stub->insn[3], rec->ip + MCOUNT_INSN_SIZE, 0)) + return -EINVAL; + ool_stub_template.insn[3] = ppc_inst_val(inst); + + if (!ret) + ret = patch_instructions((u32 *)ool_stub, (u32 *)&ool_stub_template, + sizeof(ool_stub_template), false); + + return ret; +#else /* !CONFIG_PPC_FTRACE_OUT_OF_LINE */ + BUILD_BUG(); +#endif } -int ftrace_make_nop(struct module *mod, - struct dyn_ftrace *rec, unsigned long addr) +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS +static const struct ftrace_ops *powerpc_rec_get_ops(struct dyn_ftrace *rec) { - unsigned long ip = rec->ip; - ppc_inst_t old, new; + const struct ftrace_ops *ops = NULL; - /* - * If the calling address is more that 24 bits away, - * then we had to use a trampoline to make the call. - * Otherwise just update the call site. - */ - if (test_24bit_addr(ip, addr)) { - /* within range */ - old = ftrace_call_replace(ip, addr, 1); - new = ppc_inst(PPC_RAW_NOP()); - return ftrace_modify_code(ip, old, new); - } else if (core_kernel_text(ip)) { - return __ftrace_make_nop_kernel(rec, addr); - } else if (!IS_ENABLED(CONFIG_MODULES)) { - return -EINVAL; + if (rec->flags & FTRACE_FL_CALL_OPS_EN) { + ops = ftrace_find_unique_ops(rec); + WARN_ON_ONCE(!ops); } - /* - * Out of range jumps are called from modules. - * We should either already have a pointer to the module - * or it has been passed in. - */ - if (!rec->arch.mod) { - if (!mod) { - pr_err("No module loaded addr=%lx\n", addr); - return -EFAULT; - } - rec->arch.mod = mod; - } else if (mod) { - if (mod != rec->arch.mod) { - pr_err("Record mod %p not equal to passed in mod %p\n", - rec->arch.mod, mod); - return -EINVAL; - } - /* nothing to do if mod == rec->arch.mod */ - } else - mod = rec->arch.mod; + if (!ops) + ops = &ftrace_list_ops; - return __ftrace_make_nop(mod, rec, addr); + return ops; } -#ifdef CONFIG_MODULES -/* - * Examine the existing instructions for __ftrace_make_call. - * They should effectively be a NOP, and follow formal constraints, - * depending on the ABI. Return false if they don't. - */ -static bool expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1) +static int ftrace_rec_set_ops(struct dyn_ftrace *rec, const struct ftrace_ops *ops) { - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) - return ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP())); + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + return patch_ulong((void *)(ftrace_get_ool_stub(rec) - sizeof(unsigned long)), + (unsigned long)ops); else - return ppc_inst_equal(op0, ppc_inst(PPC_RAW_BRANCH(8))) && - ppc_inst_equal(op1, ppc_inst(PPC_INST_LD_TOC)); + return patch_ulong((void *)(rec->ip - MCOUNT_INSN_SIZE - sizeof(unsigned long)), + (unsigned long)ops); } -static int -__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) { - ppc_inst_t op[2]; - void *ip = (void *)rec->ip; - unsigned long entry, ptr, tramp; - struct module *mod = rec->arch.mod; - - /* read where this goes */ - if (copy_inst_from_kernel_nofault(op, ip)) - return -EFAULT; - - if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && - copy_inst_from_kernel_nofault(op + 1, ip + 4)) - return -EFAULT; - - if (!expected_nop_sequence(ip, op[0], op[1])) { - pr_err("Unexpected call sequence at %p: %08lx %08lx\n", ip, - ppc_inst_as_ulong(op[0]), ppc_inst_as_ulong(op[1])); - return -EINVAL; - } - - /* If we never set up ftrace trampoline(s), then bail */ - if (!mod->arch.tramp || - (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !mod->arch.tramp_regs)) { - pr_err("No ftrace trampoline\n"); - return -EINVAL; - } - - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && rec->flags & FTRACE_FL_REGS) - tramp = mod->arch.tramp_regs; - else - tramp = mod->arch.tramp; - - if (module_trampoline_target(mod, tramp, &ptr)) { - pr_err("Failed to get trampoline target\n"); - return -EFAULT; - } - - pr_devel("trampoline target %lx", ptr); - - entry = ppc_global_function_entry((void *)addr); - /* This should match what was called */ - if (ptr != entry) { - pr_err("addr %lx does not match expected %lx\n", ptr, entry); - return -EINVAL; - } - - if (patch_branch(ip, tramp, BRANCH_SET_LINK)) { - pr_err("REL24 out of range!\n"); - return -EINVAL; - } + return ftrace_rec_set_ops(rec, &ftrace_nop_ops); +} - return 0; +static int ftrace_rec_update_ops(struct dyn_ftrace *rec) +{ + return ftrace_rec_set_ops(rec, powerpc_rec_get_ops(rec)); } #else -static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) { return 0; } +static int ftrace_rec_update_ops(struct dyn_ftrace *rec) { return 0; } +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { - return 0; + /* This should never be called since we override ftrace_replace_code() */ + WARN_ON(1); + return -EINVAL; } -#endif /* CONFIG_MODULES */ +#endif -static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - ppc_inst_t op; - void *ip = (void *)rec->ip; - unsigned long tramp, entry, ptr; - - /* Make sure we're being asked to patch branch to a known ftrace addr */ - entry = ppc_global_function_entry((void *)ftrace_caller); - ptr = ppc_global_function_entry((void *)addr); - - if (ptr != entry && IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) - entry = ppc_global_function_entry((void *)ftrace_regs_caller); + ppc_inst_t old, new; + unsigned long ip = rec->ip; + int ret = 0; - if (ptr != entry) { - pr_err("Unknown ftrace addr to patch: %ps\n", (void *)ptr); + /* This can only ever be called during module load */ + if (WARN_ON(!IS_ENABLED(CONFIG_MODULES) || core_kernel_text(ip))) return -EINVAL; - } - /* Make sure we have a nop */ - if (copy_inst_from_kernel_nofault(&op, ip)) { - pr_err("Unable to read ftrace location %p\n", ip); - return -EFAULT; + old = ppc_inst(PPC_RAW_NOP()); + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { + ip = ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE; /* second instruction in stub */ + ret = ftrace_get_call_inst(rec, (unsigned long)ftrace_caller, &old); } - if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) { - pr_err("Unexpected call sequence at %p: %08lx\n", - ip, ppc_inst_as_ulong(op)); - return -EINVAL; - } + ret |= ftrace_get_call_inst(rec, addr, &new); - tramp = find_ftrace_tramp((unsigned long)ip); - if (!tramp) { - pr_err("No ftrace trampolines reachable from %ps\n", ip); - return -EINVAL; - } + if (!ret) + ret = ftrace_modify_code(ip, old, new); - if (patch_branch(ip, tramp, BRANCH_SET_LINK)) { - pr_err("Error patching branch to ftrace tramp!\n"); - return -EINVAL; - } + ret = ftrace_rec_update_ops(rec); + if (ret) + return ret; - return 0; + if (!ret && IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + ret = ftrace_modify_code(rec->ip, ppc_inst(PPC_RAW_NOP()), + ppc_inst(PPC_RAW_BRANCH((long)ftrace_get_ool_stub(rec) - (long)rec->ip))); + + return ret; } -int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - unsigned long ip = rec->ip; - ppc_inst_t old, new; - /* - * If the calling address is more that 24 bits away, - * then we had to use a trampoline to make the call. - * Otherwise just update the call site. + * This should never be called since we override ftrace_replace_code(), + * as well as ftrace_init_nop() */ - if (test_24bit_addr(ip, addr)) { - /* within range */ - old = ppc_inst(PPC_RAW_NOP()); - new = ftrace_call_replace(ip, addr, 1); - return ftrace_modify_code(ip, old, new); - } else if (core_kernel_text(ip)) { - return __ftrace_make_call_kernel(rec, addr); - } else if (!IS_ENABLED(CONFIG_MODULES)) { - /* We should not get here without modules */ - return -EINVAL; - } - - /* - * Out of range jumps are called from modules. - * Being that we are converting from nop, it had better - * already have a module defined. - */ - if (!rec->arch.mod) { - pr_err("No module loaded\n"); - return -EINVAL; - } - - return __ftrace_make_call(rec, addr); + WARN_ON(1); + return -EINVAL; } -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS -#ifdef CONFIG_MODULES -static int -__ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, - unsigned long addr) +void ftrace_replace_code(int enable) { - ppc_inst_t op; - unsigned long ip = rec->ip; - unsigned long entry, ptr, tramp; - struct module *mod = rec->arch.mod; + ppc_inst_t old, new, call_inst, new_call_inst; + ppc_inst_t nop_inst = ppc_inst(PPC_RAW_NOP()); + unsigned long ip, new_addr, addr; + struct ftrace_rec_iter *iter; + struct dyn_ftrace *rec; + int ret = 0, update; - /* If we never set up ftrace trampolines, then bail */ - if (!mod->arch.tramp || !mod->arch.tramp_regs) { - pr_err("No ftrace trampoline\n"); - return -EINVAL; - } + for_ftrace_rec_iter(iter) { + rec = ftrace_rec_iter_record(iter); + ip = rec->ip; - /* read where this goes */ - if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { - pr_err("Fetching opcode failed.\n"); - return -EFAULT; - } - - /* Make sure that this is still a 24bit jump */ - if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); - return -EINVAL; - } - - /* lets find where the pointer goes */ - tramp = find_bl_target(ip, op); - entry = ppc_global_function_entry((void *)old_addr); + if (rec->flags & FTRACE_FL_DISABLED && !(rec->flags & FTRACE_FL_ENABLED)) + continue; - pr_devel("ip:%lx jumps to %lx", ip, tramp); + addr = ftrace_get_addr_curr(rec); + new_addr = ftrace_get_addr_new(rec); + update = ftrace_update_record(rec, enable); - if (tramp != entry) { - /* old_addr is not within range, so we must have used a trampoline */ - if (module_trampoline_target(mod, tramp, &ptr)) { - pr_err("Failed to get trampoline target\n"); - return -EFAULT; + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) && update != FTRACE_UPDATE_IGNORE) { + ip = ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE; + ret = ftrace_get_call_inst(rec, (unsigned long)ftrace_caller, &nop_inst); + if (ret) + goto out; } - pr_devel("trampoline target %lx", ptr); - - /* This should match what was called */ - if (ptr != entry) { - pr_err("addr %lx does not match expected %lx\n", ptr, entry); - return -EINVAL; + switch (update) { + case FTRACE_UPDATE_IGNORE: + default: + continue; + case FTRACE_UPDATE_MODIFY_CALL: + ret = ftrace_get_call_inst(rec, new_addr, &new_call_inst); + ret |= ftrace_get_call_inst(rec, addr, &call_inst); + ret |= ftrace_rec_update_ops(rec); + old = call_inst; + new = new_call_inst; + break; + case FTRACE_UPDATE_MAKE_NOP: + ret = ftrace_get_call_inst(rec, addr, &call_inst); + ret |= ftrace_rec_set_nop_ops(rec); + old = call_inst; + new = nop_inst; + break; + case FTRACE_UPDATE_MAKE_CALL: + ret = ftrace_get_call_inst(rec, new_addr, &call_inst); + ret |= ftrace_rec_update_ops(rec); + old = nop_inst; + new = call_inst; + break; } - } - /* The new target may be within range */ - if (test_24bit_addr(ip, addr)) { - /* within range */ - if (patch_branch((u32 *)ip, addr, BRANCH_SET_LINK)) { - pr_err("REL24 out of range!\n"); - return -EINVAL; - } + if (!ret) + ret = ftrace_modify_code(ip, old, new); - return 0; - } + if (!ret && IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) && + (update == FTRACE_UPDATE_MAKE_NOP || update == FTRACE_UPDATE_MAKE_CALL)) { + /* Update the actual ftrace location */ + call_inst = ppc_inst(PPC_RAW_BRANCH((long)ftrace_get_ool_stub(rec) - + (long)rec->ip)); + nop_inst = ppc_inst(PPC_RAW_NOP()); + ip = rec->ip; - if (rec->flags & FTRACE_FL_REGS) - tramp = mod->arch.tramp_regs; - else - tramp = mod->arch.tramp; + if (update == FTRACE_UPDATE_MAKE_NOP) + ret = ftrace_modify_code(ip, call_inst, nop_inst); + else + ret = ftrace_modify_code(ip, nop_inst, call_inst); - if (module_trampoline_target(mod, tramp, &ptr)) { - pr_err("Failed to get trampoline target\n"); - return -EFAULT; - } - - pr_devel("trampoline target %lx", ptr); - - entry = ppc_global_function_entry((void *)addr); - /* This should match what was called */ - if (ptr != entry) { - pr_err("addr %lx does not match expected %lx\n", ptr, entry); - return -EINVAL; - } + if (ret) + goto out; + } - if (patch_branch((u32 *)ip, tramp, BRANCH_SET_LINK)) { - pr_err("REL24 out of range!\n"); - return -EINVAL; + if (ret) + goto out; } - return 0; -} -#else -static int __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) -{ - return 0; +out: + if (ret) + ftrace_bug(ret, rec); + return; } -#endif -int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, - unsigned long addr) +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) { - unsigned long ip = rec->ip; + unsigned long addr, ip = rec->ip; ppc_inst_t old, new; + int ret = 0; + + /* Verify instructions surrounding the ftrace location */ + if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) { + /* Expect nops */ + if (!IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_NOP())); + if (!ret) + ret = ftrace_validate_inst(ip, ppc_inst(PPC_RAW_NOP())); + } else if (IS_ENABLED(CONFIG_PPC32)) { + /* Expected sequence: 'mflr r0', 'stw r0,4(r1)', 'bl _mcount' */ + ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0))); + if (ret) + return ret; + ret = ftrace_modify_code(ip - 4, ppc_inst(PPC_RAW_STW(_R0, _R1, 4)), + ppc_inst(PPC_RAW_NOP())); + } else if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) { + /* Expected sequence: 'mflr r0', ['std r0,16(r1)'], 'bl _mcount' */ + ret = ftrace_read_inst(ip - 4, &old); + if (!ret && !ppc_inst_equal(old, ppc_inst(PPC_RAW_MFLR(_R0)))) { + /* Gcc v5.x emit the additional 'std' instruction, gcc v6.x don't */ + ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0))); + if (ret) + return ret; + ret = ftrace_modify_code(ip - 4, ppc_inst(PPC_RAW_STD(_R0, _R1, 16)), + ppc_inst(PPC_RAW_NOP())); + } + } else { + return -EINVAL; + } - /* - * If the calling address is more that 24 bits away, - * then we had to use a trampoline to make the call. - * Otherwise just update the call site. - */ - if (test_24bit_addr(ip, addr) && test_24bit_addr(ip, old_addr)) { - /* within range */ - old = ftrace_call_replace(ip, old_addr, 1); - new = ftrace_call_replace(ip, addr, 1); - return ftrace_modify_code(ip, old, new); - } else if (core_kernel_text(ip)) { + if (ret) + return ret; + + /* Set up out-of-line stub */ + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { + ret = ftrace_init_ool_stub(mod, rec); + goto out; + } + + /* Nop-out the ftrace location */ + new = ppc_inst(PPC_RAW_NOP()); + addr = MCOUNT_ADDR; + if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) { + /* we instead patch-in the 'mflr r0' */ + old = ppc_inst(PPC_RAW_NOP()); + new = ppc_inst(PPC_RAW_MFLR(_R0)); + ret = ftrace_modify_code(ip - 4, old, new); + } else if (is_offset_in_branch_range(addr - ip)) { + /* Within range */ + old = ftrace_create_branch_inst(ip, addr, 1); + ret = ftrace_modify_code(ip, old, new); + } else if (core_kernel_text(ip) || (IS_ENABLED(CONFIG_MODULES) && mod)) { /* - * We always patch out of range locations to go to the regs - * variant, so there is nothing to do here + * We would be branching to a linker-generated stub, or to the module _mcount + * stub. Let's just confirm we have a 'bl' here. */ - return 0; - } else if (!IS_ENABLED(CONFIG_MODULES)) { - /* We should not get here without modules */ + ret = ftrace_read_inst(ip, &old); + if (ret) + return ret; + if (!is_bl_op(old)) { + pr_err("0x%lx: expected (bl) != found (%08lx)\n", ip, ppc_inst_as_ulong(old)); + return -EINVAL; + } + ret = patch_instruction((u32 *)ip, new); + } else { return -EINVAL; } - /* - * Out of range jumps are called from modules. - */ - if (!rec->arch.mod) { - pr_err("No module loaded\n"); - return -EINVAL; - } +out: + if (!ret) + ret = ftrace_rec_set_nop_ops(rec); - return __ftrace_modify_call(rec, old_addr, addr); + return ret; } -#endif int ftrace_update_ftrace_func(ftrace_func_t func) { @@ -681,15 +535,22 @@ int ftrace_update_ftrace_func(ftrace_func_t func) ppc_inst_t old, new; int ret; + /* + * When using CALL_OPS, the function to call is associated with the + * call site, and we don't have a global function pointer to update. + */ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + return 0; + old = ppc_inst_read((u32 *)&ftrace_call); - new = ftrace_call_replace(ip, (unsigned long)func, 1); + new = ftrace_create_branch_inst(ip, ppc_function_entry(func), 1); ret = ftrace_modify_code(ip, old, new); /* Also update the regs callback function */ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !ret) { ip = (unsigned long)(&ftrace_regs_call); old = ppc_inst_read((u32 *)&ftrace_regs_call); - new = ftrace_call_replace(ip, (unsigned long)func, 1); + new = ftrace_create_branch_inst(ip, ppc_function_entry(func), 1); ret = ftrace_modify_code(ip, old, new); } @@ -705,11 +566,6 @@ void arch_ftrace_update_code(int command) ftrace_modify_all_code(command); } -#ifdef CONFIG_PPC64 -#define PACATOC offsetof(struct paca_struct, kernel_toc) - -extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; - void ftrace_free_init_tramp(void) { int i; @@ -721,84 +577,91 @@ void ftrace_free_init_tramp(void) } } -int __init ftrace_dyn_arch_init(void) +static void __init add_ftrace_tramp(unsigned long tramp) { int i; + + for (i = 0; i < NUM_FTRACE_TRAMPS; i++) + if (!ftrace_tramps[i]) { + ftrace_tramps[i] = tramp; + return; + } +} + +int __init ftrace_dyn_arch_init(void) +{ unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init }; + unsigned long addr = FTRACE_REGS_ADDR; + long reladdr; + int i; u32 stub_insns[] = { - PPC_RAW_LD(_R12, _R13, PACATOC), +#ifdef CONFIG_PPC_KERNEL_PCREL + /* pla r12,addr */ + PPC_PREFIX_MLS | __PPC_PRFX_R(1), + PPC_INST_PADDI | ___PPC_RT(_R12), + PPC_RAW_MTCTR(_R12), + PPC_RAW_BCTR() +#elif defined(CONFIG_PPC64) + PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)), PPC_RAW_ADDIS(_R12, _R12, 0), PPC_RAW_ADDI(_R12, _R12, 0), PPC_RAW_MTCTR(_R12), PPC_RAW_BCTR() +#else + PPC_RAW_LIS(_R12, 0), + PPC_RAW_ADDI(_R12, _R12, 0), + PPC_RAW_MTCTR(_R12), + PPC_RAW_BCTR() +#endif }; - unsigned long addr; - long reladdr; - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) - addr = ppc_global_function_entry((void *)ftrace_regs_caller); - else - addr = ppc_global_function_entry((void *)ftrace_caller); + if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { + for (i = 0; i < 2; i++) { + reladdr = addr - (unsigned long)tramp[i]; + + if (reladdr >= (long)SZ_8G || reladdr < -(long)SZ_8G) { + pr_err("Address of %ps out of range of pcrel address.\n", + (void *)addr); + return -1; + } - reladdr = addr - kernel_toc_addr(); + memcpy(tramp[i], stub_insns, sizeof(stub_insns)); + tramp[i][0] |= IMM_H18(reladdr); + tramp[i][1] |= IMM_L(reladdr); + add_ftrace_tramp((unsigned long)tramp[i]); + } + } else if (IS_ENABLED(CONFIG_PPC64)) { + reladdr = addr - kernel_toc_addr(); - if (reladdr >= SZ_2G || reladdr < -(long)SZ_2G) { - pr_err("Address of %ps out of range of kernel_toc.\n", + if (reladdr >= (long)SZ_2G || reladdr < -(long long)SZ_2G) { + pr_err("Address of %ps out of range of kernel_toc.\n", (void *)addr); - return -1; - } + return -1; + } - for (i = 0; i < 2; i++) { - memcpy(tramp[i], stub_insns, sizeof(stub_insns)); - tramp[i][1] |= PPC_HA(reladdr); - tramp[i][2] |= PPC_LO(reladdr); - add_ftrace_tramp((unsigned long)tramp[i]); + for (i = 0; i < 2; i++) { + memcpy(tramp[i], stub_insns, sizeof(stub_insns)); + tramp[i][1] |= PPC_HA(reladdr); + tramp[i][2] |= PPC_LO(reladdr); + add_ftrace_tramp((unsigned long)tramp[i]); + } + } else { + for (i = 0; i < 2; i++) { + memcpy(tramp[i], stub_insns, sizeof(stub_insns)); + tramp[i][0] |= PPC_HA(addr); + tramp[i][1] |= PPC_LO(addr); + add_ftrace_tramp((unsigned long)tramp[i]); + } } return 0; } -#endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER - -extern void ftrace_graph_call(void); -extern void ftrace_graph_stub(void); - -static int ftrace_modify_ftrace_graph_caller(bool enable) -{ - unsigned long ip = (unsigned long)(&ftrace_graph_call); - unsigned long addr = (unsigned long)(&ftrace_graph_caller); - unsigned long stub = (unsigned long)(&ftrace_graph_stub); - ppc_inst_t old, new; - - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS)) - return 0; - - old = ftrace_call_replace(ip, enable ? stub : addr, 0); - new = ftrace_call_replace(ip, enable ? addr : stub, 0); - - return ftrace_modify_code(ip, old, new); -} - -int ftrace_enable_ftrace_graph_caller(void) -{ - return ftrace_modify_ftrace_graph_caller(true); -} - -int ftrace_disable_ftrace_graph_caller(void) -{ - return ftrace_modify_ftrace_graph_caller(false); -} - -/* - * Hook the return address and push it in the stack of return addrs - * in current thread info. Return the address we want to divert to. - */ -static unsigned long -__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp) +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) { - unsigned long return_hooker; - int bit; + unsigned long sp = arch_ftrace_regs(fregs)->regs.gpr[1]; if (unlikely(ftrace_graph_is_dead())) goto out; @@ -806,41 +669,10 @@ __prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp if (unlikely(atomic_read(¤t->tracing_graph_pause))) goto out; - bit = ftrace_test_recursion_trylock(ip, parent); - if (bit < 0) - goto out; + if (!function_graph_enter_regs(parent_ip, ip, 0, (unsigned long *)sp, fregs)) + parent_ip = ppc_function_entry(return_to_handler); - return_hooker = ppc_function_entry(return_to_handler); - - if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp)) - parent = return_hooker; - - ftrace_test_recursion_unlock(bit); out: - return parent; + arch_ftrace_regs(fregs)->regs.link = parent_ip; } - -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS -void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct ftrace_regs *fregs) -{ - fregs->regs.link = __prepare_ftrace_return(parent_ip, ip, fregs->regs.gpr[1]); -} -#else -unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, - unsigned long sp) -{ - return __prepare_ftrace_return(parent, ip, sp); -} -#endif #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - -#ifdef CONFIG_PPC64_ELF_ABI_V1 -char *arch_ftrace_match_adjust(char *str, const char *search) -{ - if (str[0] == '.' && search[0] != '.') - return str + 1; - else - return str; -} -#endif /* CONFIG_PPC64_ELF_ABI_V1 */ diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S deleted file mode 100644 index 6708e24db0ab..000000000000 --- a/arch/powerpc/kernel/trace/ftrace_64_pg.S +++ /dev/null @@ -1,67 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Split from ftrace_64.S - */ - -#include <linux/magic.h> -#include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> -#include <asm/ftrace.h> -#include <asm/ppc-opcode.h> -#include <asm/export.h> - -_GLOBAL_TOC(ftrace_caller) - lbz r3, PACA_FTRACE_ENABLED(r13) - cmpdi r3, 0 - beqlr - - /* Taken from output of objdump from lib64/glibc */ - mflr r3 - ld r11, 0(r1) - stdu r1, -112(r1) - std r3, 128(r1) - ld r4, 16(r11) - subi r3, r3, MCOUNT_INSN_SIZE -.globl ftrace_call -ftrace_call: - bl ftrace_stub - nop -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -.globl ftrace_graph_call -ftrace_graph_call: - b ftrace_graph_stub -_GLOBAL(ftrace_graph_stub) -#endif - ld r0, 128(r1) - mtlr r0 - addi r1, r1, 112 - -_GLOBAL(ftrace_stub) - blr - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -_GLOBAL(ftrace_graph_caller) - addi r5, r1, 112 - /* load r4 with local address */ - ld r4, 128(r1) - subi r4, r4, MCOUNT_INSN_SIZE - - /* Grab the LR out of the caller stack frame */ - ld r11, 112(r1) - ld r3, 16(r11) - - bl prepare_ftrace_return - nop - - /* - * prepare_ftrace_return gives us the address we divert to. - * Change the LR in the callers stack frame to this. - */ - ld r11, 112(r1) - std r3, 16(r11) - - ld r0, 128(r1) - mtlr r0 - addi r1, r1, 112 - blr -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c new file mode 100644 index 000000000000..5c6e545d1708 --- /dev/null +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c @@ -0,0 +1,832 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Code for replacing ftrace calls with jumps. + * + * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> + * + * Thanks goes out to P.A. Semi, Inc for supplying me with a PPC64 box. + * + * Added function graph tracer code, taken from x86 that was written + * by Frederic Weisbecker, and ported to PPC by Steven Rostedt. + * + */ + +#define pr_fmt(fmt) "ftrace-powerpc: " fmt + +#include <linux/spinlock.h> +#include <linux/hardirq.h> +#include <linux/uaccess.h> +#include <linux/module.h> +#include <linux/ftrace.h> +#include <linux/percpu.h> +#include <linux/init.h> +#include <linux/list.h> + +#include <asm/cacheflush.h> +#include <asm/text-patching.h> +#include <asm/ftrace.h> +#include <asm/syscall.h> +#include <asm/inst.h> + +/* + * We generally only have a single long_branch tramp and at most 2 or 3 plt + * tramps generated. But, we don't use the plt tramps currently. We also allot + * 2 tramps after .text and .init.text. So, we only end up with around 3 usable + * tramps in total. Set aside 8 just to be sure. + */ +#define NUM_FTRACE_TRAMPS 8 +static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS]; + +unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr; +} + +static ppc_inst_t +ftrace_call_replace(unsigned long ip, unsigned long addr, int link) +{ + ppc_inst_t op; + + addr = ppc_function_entry((void *)addr); + + /* if (link) set op to 'bl' else 'b' */ + create_branch(&op, (u32 *)ip, addr, link ? BRANCH_SET_LINK : 0); + + return op; +} + +static inline int +ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new) +{ + ppc_inst_t replaced; + + /* + * Note: + * We are paranoid about modifying text, as if a bug was to happen, it + * could cause us to read or write to someplace that could cause harm. + * Carefully read and modify the code with probe_kernel_*(), and make + * sure what we read is what we expected it to be before modifying it. + */ + + /* read the text we want to modify */ + if (copy_inst_from_kernel_nofault(&replaced, (void *)ip)) + return -EFAULT; + + /* Make sure it is what we expect it to be */ + if (!ppc_inst_equal(replaced, old)) { + pr_err("%p: replaced (%08lx) != old (%08lx)", (void *)ip, + ppc_inst_as_ulong(replaced), ppc_inst_as_ulong(old)); + return -EINVAL; + } + + /* replace the text with the new text */ + return patch_instruction((u32 *)ip, new); +} + +/* + * Helper functions that are the same for both PPC64 and PPC32. + */ +static int test_24bit_addr(unsigned long ip, unsigned long addr) +{ + addr = ppc_function_entry((void *)addr); + + return is_offset_in_branch_range(addr - ip); +} + +static int is_bl_op(ppc_inst_t op) +{ + return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BL(0); +} + +static int is_b_op(ppc_inst_t op) +{ + return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BRANCH(0); +} + +static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op) +{ + int offset; + + offset = PPC_LI(ppc_inst_val(op)); + /* make it signed */ + if (offset & 0x02000000) + offset |= 0xfe000000; + + return ip + (long)offset; +} + +#ifdef CONFIG_MODULES +static struct module *ftrace_lookup_module(struct dyn_ftrace *rec) +{ + struct module *mod; + + scoped_guard(rcu) + mod = __module_text_address(rec->ip); + if (!mod) + pr_err("No module loaded at addr=%lx\n", rec->ip); + + return mod; +} + +static int +__ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long entry, ptr, tramp; + unsigned long ip = rec->ip; + ppc_inst_t op, pop; + + if (!mod) { + mod = ftrace_lookup_module(rec); + if (!mod) + return -EINVAL; + } + + /* read where this goes */ + if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { + pr_err("Fetching opcode failed.\n"); + return -EFAULT; + } + + /* Make sure that this is still a 24bit jump */ + if (!is_bl_op(op)) { + pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); + return -EINVAL; + } + + /* lets find where the pointer goes */ + tramp = find_bl_target(ip, op); + + pr_devel("ip:%lx jumps to %lx", ip, tramp); + + if (module_trampoline_target(mod, tramp, &ptr)) { + pr_err("Failed to get trampoline target\n"); + return -EFAULT; + } + + pr_devel("trampoline target %lx", ptr); + + entry = ppc_global_function_entry((void *)addr); + /* This should match what was called */ + if (ptr != entry) { + pr_err("addr %lx does not match expected %lx\n", ptr, entry); + return -EINVAL; + } + + if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) { + if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) { + pr_err("Fetching instruction at %lx failed.\n", ip - 4); + return -EFAULT; + } + + /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ + if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_MFLR(_R0))) && + !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) { + pr_err("Unexpected instruction %08lx around bl _mcount\n", + ppc_inst_as_ulong(op)); + return -EINVAL; + } + } else if (IS_ENABLED(CONFIG_PPC64)) { + /* + * Check what is in the next instruction. We can see ld r2,40(r1), but + * on first pass after boot we will see mflr r0. + */ + if (copy_inst_from_kernel_nofault(&op, (void *)(ip + 4))) { + pr_err("Fetching op failed.\n"); + return -EFAULT; + } + + if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) { + pr_err("Expected %08lx found %08lx\n", PPC_INST_LD_TOC, + ppc_inst_as_ulong(op)); + return -EINVAL; + } + } + + /* + * When using -mprofile-kernel or PPC32 there is no load to jump over. + * + * Otherwise our original call site looks like: + * + * bl <tramp> + * ld r2,XX(r1) + * + * Milton Miller pointed out that we can not simply nop the branch. + * If a task was preempted when calling a trace function, the nops + * will remove the way to restore the TOC in r2 and the r2 TOC will + * get corrupted. + * + * Use a b +8 to jump over the load. + */ + if (IS_ENABLED(CONFIG_MPROFILE_KERNEL) || IS_ENABLED(CONFIG_PPC32)) + pop = ppc_inst(PPC_RAW_NOP()); + else + pop = ppc_inst(PPC_RAW_BRANCH(8)); /* b +8 */ + + if (patch_instruction((u32 *)ip, pop)) { + pr_err("Patching NOP failed.\n"); + return -EPERM; + } + + return 0; +} +#else +static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) +{ + return 0; +} +#endif /* CONFIG_MODULES */ + +static unsigned long find_ftrace_tramp(unsigned long ip) +{ + int i; + + /* + * We have the compiler generated long_branch tramps at the end + * and we prefer those + */ + for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--) + if (!ftrace_tramps[i]) + continue; + else if (is_offset_in_branch_range(ftrace_tramps[i] - ip)) + return ftrace_tramps[i]; + + return 0; +} + +static int add_ftrace_tramp(unsigned long tramp) +{ + int i; + + for (i = 0; i < NUM_FTRACE_TRAMPS; i++) + if (!ftrace_tramps[i]) { + ftrace_tramps[i] = tramp; + return 0; + } + + return -1; +} + +/* + * If this is a compiler generated long_branch trampoline (essentially, a + * trampoline that has a branch to _mcount()), we re-write the branch to + * instead go to ftrace_[regs_]caller() and note down the location of this + * trampoline. + */ +static int setup_mcount_compiler_tramp(unsigned long tramp) +{ + int i; + ppc_inst_t op; + unsigned long ptr; + + /* Is this a known long jump tramp? */ + for (i = 0; i < NUM_FTRACE_TRAMPS; i++) + if (ftrace_tramps[i] == tramp) + return 0; + + /* New trampoline -- read where this goes */ + if (copy_inst_from_kernel_nofault(&op, (void *)tramp)) { + pr_debug("Fetching opcode failed.\n"); + return -1; + } + + /* Is this a 24 bit branch? */ + if (!is_b_op(op)) { + pr_debug("Trampoline is not a long branch tramp.\n"); + return -1; + } + + /* lets find where the pointer goes */ + ptr = find_bl_target(tramp, op); + + if (ptr != ppc_global_function_entry((void *)_mcount)) { + pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr); + return -1; + } + + /* Let's re-write the tramp to go to ftrace_[regs_]caller */ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + ptr = ppc_global_function_entry((void *)ftrace_regs_caller); + else + ptr = ppc_global_function_entry((void *)ftrace_caller); + + if (patch_branch((u32 *)tramp, ptr, 0)) { + pr_debug("REL24 out of range!\n"); + return -1; + } + + if (add_ftrace_tramp(tramp)) { + pr_debug("No tramp locations left\n"); + return -1; + } + + return 0; +} + +static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long tramp, ip = rec->ip; + ppc_inst_t op; + + /* Read where this goes */ + if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { + pr_err("Fetching opcode failed.\n"); + return -EFAULT; + } + + /* Make sure that this is still a 24bit jump */ + if (!is_bl_op(op)) { + pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); + return -EINVAL; + } + + /* Let's find where the pointer goes */ + tramp = find_bl_target(ip, op); + + pr_devel("ip:%lx jumps to %lx", ip, tramp); + + if (setup_mcount_compiler_tramp(tramp)) { + /* Are other trampolines reachable? */ + if (!find_ftrace_tramp(ip)) { + pr_err("No ftrace trampolines reachable from %ps\n", + (void *)ip); + return -EINVAL; + } + } + + if (patch_instruction((u32 *)ip, ppc_inst(PPC_RAW_NOP()))) { + pr_err("Patching NOP failed.\n"); + return -EPERM; + } + + return 0; +} + +int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long ip = rec->ip; + ppc_inst_t old, new; + + /* + * If the calling address is more that 24 bits away, + * then we had to use a trampoline to make the call. + * Otherwise just update the call site. + */ + if (test_24bit_addr(ip, addr)) { + /* within range */ + old = ftrace_call_replace(ip, addr, 1); + new = ppc_inst(PPC_RAW_NOP()); + return ftrace_modify_code(ip, old, new); + } else if (core_kernel_text(ip)) { + return __ftrace_make_nop_kernel(rec, addr); + } else if (!IS_ENABLED(CONFIG_MODULES)) { + return -EINVAL; + } + + return __ftrace_make_nop(mod, rec, addr); +} + +#ifdef CONFIG_MODULES +/* + * Examine the existing instructions for __ftrace_make_call. + * They should effectively be a NOP, and follow formal constraints, + * depending on the ABI. Return false if they don't. + */ +static bool expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1) +{ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + return ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP())); + else + return ppc_inst_equal(op0, ppc_inst(PPC_RAW_BRANCH(8))) && + ppc_inst_equal(op1, ppc_inst(PPC_INST_LD_TOC)); +} + +static int +__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + ppc_inst_t op[2]; + void *ip = (void *)rec->ip; + unsigned long entry, ptr, tramp; + struct module *mod = ftrace_lookup_module(rec); + + if (!mod) + return -EINVAL; + + /* read where this goes */ + if (copy_inst_from_kernel_nofault(op, ip)) + return -EFAULT; + + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && + copy_inst_from_kernel_nofault(op + 1, ip + 4)) + return -EFAULT; + + if (!expected_nop_sequence(ip, op[0], op[1])) { + pr_err("Unexpected call sequence at %p: %08lx %08lx\n", ip, + ppc_inst_as_ulong(op[0]), ppc_inst_as_ulong(op[1])); + return -EINVAL; + } + + /* If we never set up ftrace trampoline(s), then bail */ + if (!mod->arch.tramp || + (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !mod->arch.tramp_regs)) { + pr_err("No ftrace trampoline\n"); + return -EINVAL; + } + + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && rec->flags & FTRACE_FL_REGS) + tramp = mod->arch.tramp_regs; + else + tramp = mod->arch.tramp; + + if (module_trampoline_target(mod, tramp, &ptr)) { + pr_err("Failed to get trampoline target\n"); + return -EFAULT; + } + + pr_devel("trampoline target %lx", ptr); + + entry = ppc_global_function_entry((void *)addr); + /* This should match what was called */ + if (ptr != entry) { + pr_err("addr %lx does not match expected %lx\n", ptr, entry); + return -EINVAL; + } + + if (patch_branch(ip, tramp, BRANCH_SET_LINK)) { + pr_err("REL24 out of range!\n"); + return -EINVAL; + } + + return 0; +} +#else +static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + return 0; +} +#endif /* CONFIG_MODULES */ + +static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) +{ + ppc_inst_t op; + void *ip = (void *)rec->ip; + unsigned long tramp, entry, ptr; + + /* Make sure we're being asked to patch branch to a known ftrace addr */ + entry = ppc_global_function_entry((void *)ftrace_caller); + ptr = ppc_global_function_entry((void *)addr); + + if (ptr != entry && IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + entry = ppc_global_function_entry((void *)ftrace_regs_caller); + + if (ptr != entry) { + pr_err("Unknown ftrace addr to patch: %ps\n", (void *)ptr); + return -EINVAL; + } + + /* Make sure we have a nop */ + if (copy_inst_from_kernel_nofault(&op, ip)) { + pr_err("Unable to read ftrace location %p\n", ip); + return -EFAULT; + } + + if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) { + pr_err("Unexpected call sequence at %p: %08lx\n", + ip, ppc_inst_as_ulong(op)); + return -EINVAL; + } + + tramp = find_ftrace_tramp((unsigned long)ip); + if (!tramp) { + pr_err("No ftrace trampolines reachable from %ps\n", ip); + return -EINVAL; + } + + if (patch_branch(ip, tramp, BRANCH_SET_LINK)) { + pr_err("Error patching branch to ftrace tramp!\n"); + return -EINVAL; + } + + return 0; +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long ip = rec->ip; + ppc_inst_t old, new; + + /* + * If the calling address is more that 24 bits away, + * then we had to use a trampoline to make the call. + * Otherwise just update the call site. + */ + if (test_24bit_addr(ip, addr)) { + /* within range */ + old = ppc_inst(PPC_RAW_NOP()); + new = ftrace_call_replace(ip, addr, 1); + return ftrace_modify_code(ip, old, new); + } else if (core_kernel_text(ip)) { + return __ftrace_make_call_kernel(rec, addr); + } else if (!IS_ENABLED(CONFIG_MODULES)) { + /* We should not get here without modules */ + return -EINVAL; + } + + return __ftrace_make_call(rec, addr); +} + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#ifdef CONFIG_MODULES +static int +__ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + ppc_inst_t op; + unsigned long ip = rec->ip; + unsigned long entry, ptr, tramp; + struct module *mod = ftrace_lookup_module(rec); + + if (!mod) + return -EINVAL; + + /* If we never set up ftrace trampolines, then bail */ + if (!mod->arch.tramp || !mod->arch.tramp_regs) { + pr_err("No ftrace trampoline\n"); + return -EINVAL; + } + + /* read where this goes */ + if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { + pr_err("Fetching opcode failed.\n"); + return -EFAULT; + } + + /* Make sure that this is still a 24bit jump */ + if (!is_bl_op(op)) { + pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); + return -EINVAL; + } + + /* lets find where the pointer goes */ + tramp = find_bl_target(ip, op); + entry = ppc_global_function_entry((void *)old_addr); + + pr_devel("ip:%lx jumps to %lx", ip, tramp); + + if (tramp != entry) { + /* old_addr is not within range, so we must have used a trampoline */ + if (module_trampoline_target(mod, tramp, &ptr)) { + pr_err("Failed to get trampoline target\n"); + return -EFAULT; + } + + pr_devel("trampoline target %lx", ptr); + + /* This should match what was called */ + if (ptr != entry) { + pr_err("addr %lx does not match expected %lx\n", ptr, entry); + return -EINVAL; + } + } + + /* The new target may be within range */ + if (test_24bit_addr(ip, addr)) { + /* within range */ + if (patch_branch((u32 *)ip, addr, BRANCH_SET_LINK)) { + pr_err("REL24 out of range!\n"); + return -EINVAL; + } + + return 0; + } + + if (rec->flags & FTRACE_FL_REGS) + tramp = mod->arch.tramp_regs; + else + tramp = mod->arch.tramp; + + if (module_trampoline_target(mod, tramp, &ptr)) { + pr_err("Failed to get trampoline target\n"); + return -EFAULT; + } + + pr_devel("trampoline target %lx", ptr); + + entry = ppc_global_function_entry((void *)addr); + /* This should match what was called */ + if (ptr != entry) { + pr_err("addr %lx does not match expected %lx\n", ptr, entry); + return -EINVAL; + } + + if (patch_branch((u32 *)ip, tramp, BRANCH_SET_LINK)) { + pr_err("REL24 out of range!\n"); + return -EINVAL; + } + + return 0; +} +#else +static int __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) +{ + return 0; +} +#endif + +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + unsigned long ip = rec->ip; + ppc_inst_t old, new; + + /* + * If the calling address is more that 24 bits away, + * then we had to use a trampoline to make the call. + * Otherwise just update the call site. + */ + if (test_24bit_addr(ip, addr) && test_24bit_addr(ip, old_addr)) { + /* within range */ + old = ftrace_call_replace(ip, old_addr, 1); + new = ftrace_call_replace(ip, addr, 1); + return ftrace_modify_code(ip, old, new); + } else if (core_kernel_text(ip)) { + /* + * We always patch out of range locations to go to the regs + * variant, so there is nothing to do here + */ + return 0; + } else if (!IS_ENABLED(CONFIG_MODULES)) { + /* We should not get here without modules */ + return -EINVAL; + } + + return __ftrace_modify_call(rec, old_addr, addr); +} +#endif + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long ip = (unsigned long)(&ftrace_call); + ppc_inst_t old, new; + int ret; + + old = ppc_inst_read((u32 *)&ftrace_call); + new = ftrace_call_replace(ip, (unsigned long)func, 1); + ret = ftrace_modify_code(ip, old, new); + + /* Also update the regs callback function */ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !ret) { + ip = (unsigned long)(&ftrace_regs_call); + old = ppc_inst_read((u32 *)&ftrace_regs_call); + new = ftrace_call_replace(ip, (unsigned long)func, 1); + ret = ftrace_modify_code(ip, old, new); + } + + return ret; +} + +/* + * Use the default ftrace_modify_all_code, but without + * stop_machine(). + */ +void arch_ftrace_update_code(int command) +{ + ftrace_modify_all_code(command); +} + +#ifdef CONFIG_PPC64 +#define PACATOC offsetof(struct paca_struct, kernel_toc) + +extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; + +void ftrace_free_init_tramp(void) +{ + int i; + + for (i = 0; i < NUM_FTRACE_TRAMPS && ftrace_tramps[i]; i++) + if (ftrace_tramps[i] == (unsigned long)ftrace_tramp_init) { + ftrace_tramps[i] = 0; + return; + } +} + +int __init ftrace_dyn_arch_init(void) +{ + int i; + unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init }; + u32 stub_insns[] = { + PPC_RAW_LD(_R12, _R13, PACATOC), + PPC_RAW_ADDIS(_R12, _R12, 0), + PPC_RAW_ADDI(_R12, _R12, 0), + PPC_RAW_MTCTR(_R12), + PPC_RAW_BCTR() + }; + unsigned long addr; + long reladdr; + + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + addr = ppc_global_function_entry((void *)ftrace_regs_caller); + else + addr = ppc_global_function_entry((void *)ftrace_caller); + + reladdr = addr - kernel_toc_addr(); + + if (reladdr >= SZ_2G || reladdr < -(long)SZ_2G) { + pr_err("Address of %ps out of range of kernel_toc.\n", + (void *)addr); + return -1; + } + + for (i = 0; i < 2; i++) { + memcpy(tramp[i], stub_insns, sizeof(stub_insns)); + tramp[i][1] |= PPC_HA(reladdr); + tramp[i][2] |= PPC_LO(reladdr); + add_ftrace_tramp((unsigned long)tramp[i]); + } + + return 0; +} +#endif + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +extern void ftrace_graph_call(void); +extern void ftrace_graph_stub(void); + +static int ftrace_modify_ftrace_graph_caller(bool enable) +{ + unsigned long ip = (unsigned long)(&ftrace_graph_call); + unsigned long addr = (unsigned long)(&ftrace_graph_caller); + unsigned long stub = (unsigned long)(&ftrace_graph_stub); + ppc_inst_t old, new; + + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS)) + return 0; + + old = ftrace_call_replace(ip, enable ? stub : addr, 0); + new = ftrace_call_replace(ip, enable ? addr : stub, 0); + + return ftrace_modify_code(ip, old, new); +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_ftrace_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_ftrace_graph_caller(false); +} + +/* + * Hook the return address and push it in the stack of return addrs + * in current thread info. Return the address we want to divert to. + */ +static unsigned long +__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp, + struct ftrace_regs *fregs) +{ + unsigned long return_hooker; + + if (unlikely(ftrace_graph_is_dead())) + goto out; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + goto out; + + return_hooker = ppc_function_entry(return_to_handler); + + if (!function_graph_enter_regs(parent, ip, 0, (unsigned long *)sp, fregs)) + parent = return_hooker; + +out: + return parent; +} + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) +{ + arch_ftrace_regs(fregs)->regs.link = __prepare_ftrace_return(parent_ip, ip, + arch_ftrace_regs(fregs)->regs.gpr[1], fregs); +} +#else +unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, + unsigned long sp) +{ + return __prepare_ftrace_return(parent, ip, sp, NULL); +} +#endif +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +#ifdef CONFIG_PPC64_ELF_ABI_V1 +char *arch_ftrace_match_adjust(char *str, const char *search) +{ + if (str[0] == '.' && search[0] != '.') + return str + 1; + else + return str; +} +#endif /* CONFIG_PPC64_ELF_ABI_V1 */ diff --git a/arch/powerpc/kernel/trace/ftrace_low.S b/arch/powerpc/kernel/trace/ftrace_64_pg_entry.S index 294d1e05958a..a8a7f28404c8 100644 --- a/arch/powerpc/kernel/trace/ftrace_low.S +++ b/arch/powerpc/kernel/trace/ftrace_64_pg_entry.S @@ -1,28 +1,82 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* - * Split from entry_64.S + * Split from ftrace_64.S */ +#include <linux/export.h> #include <linux/magic.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/ftrace.h> #include <asm/ppc-opcode.h> -#include <asm/export.h> -#ifdef CONFIG_PPC64 +_GLOBAL_TOC(ftrace_caller) + lbz r3, PACA_FTRACE_ENABLED(r13) + cmpdi r3, 0 + beqlr + + /* Taken from output of objdump from lib64/glibc */ + mflr r3 + ld r11, 0(r1) + stdu r1, -112(r1) + std r3, 128(r1) + ld r4, 16(r11) + subi r3, r3, MCOUNT_INSN_SIZE +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +.globl ftrace_graph_call +ftrace_graph_call: + b ftrace_graph_stub +_GLOBAL(ftrace_graph_stub) +#endif + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 + +_GLOBAL(ftrace_stub) + blr + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +_GLOBAL(ftrace_graph_caller) + addi r5, r1, 112 + /* load r4 with local address */ + ld r4, 128(r1) + subi r4, r4, MCOUNT_INSN_SIZE + + /* Grab the LR out of the caller stack frame */ + ld r11, 112(r1) + ld r3, 16(r11) + + bl prepare_ftrace_return + nop + + /* + * prepare_ftrace_return gives us the address we divert to. + * Change the LR in the callers stack frame to this. + */ + ld r11, 112(r1) + std r3, 16(r11) + + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 + blr +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + .pushsection ".tramp.ftrace.text","aw",@progbits; .globl ftrace_tramp_text ftrace_tramp_text: - .space 64 + .space 32 .popsection .pushsection ".tramp.ftrace.init","aw",@progbits; .globl ftrace_tramp_init ftrace_tramp_init: - .space 64 + .space 32 .popsection -#endif _GLOBAL(mcount) _GLOBAL(_mcount) diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S new file mode 100644 index 000000000000..6599fe3c6234 --- /dev/null +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -0,0 +1,479 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Split from ftrace_64.S + */ + +#include <linux/export.h> +#include <linux/magic.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ftrace.h> +#include <asm/ppc-opcode.h> +#include <asm/thread_info.h> +#include <asm/bug.h> +#include <asm/ptrace.h> + +/* + * + * ftrace_caller()/ftrace_regs_caller() is the function that replaces _mcount() + * when ftrace is active. + * + * We arrive here after a function A calls function B, and we are the trace + * function for B. When we enter r1 points to A's stack frame, B has not yet + * had a chance to allocate one yet. + * + * Additionally r2 may point either to the TOC for A, or B, depending on + * whether B did a TOC setup sequence before calling us. + * + * On entry the LR points back to the _mcount() call site, and r0 holds the + * saved LR as it was on entry to B, ie. the original return address at the + * call site in A. + * + * Our job is to save the register state into a struct pt_regs (on the stack) + * and then arrange for the ftrace function to be called. + */ +.macro ftrace_regs_entry allregs + /* Create a minimal stack frame for representing B */ + PPC_STLU r1, -STACK_FRAME_MIN_SIZE(r1) + + /* Create our stack frame + pt_regs */ + PPC_STLU r1,-SWITCH_FRAME_SIZE(r1) + + .if \allregs == 1 + SAVE_GPRS(11, 12, r1) + .endif + + /* Get the _mcount() call site out of LR */ + mflr r11 + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + /* Load the ftrace_op */ + PPC_LL r12, -(MCOUNT_INSN_SIZE*2 + SZL)(r11) + + /* Load direct_call from the ftrace_op */ + PPC_LL r12, FTRACE_OPS_DIRECT_CALL(r12) + PPC_LCMPI r12, 0 + .if \allregs == 1 + bne .Lftrace_direct_call_regs + .else + bne .Lftrace_direct_call + .endif +#endif + + /* Save the previous LR in pt_regs->link */ + PPC_STL r0, _LINK(r1) + /* Also save it in A's stack frame */ + PPC_STL r0, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE+LRSAVE(r1) + + /* Save all gprs to pt_regs */ + SAVE_GPR(0, r1) + SAVE_GPRS(3, 10, r1) + +#ifdef CONFIG_PPC64 + /* Ok to continue? */ + lbz r3, PACA_FTRACE_ENABLED(r13) + cmpdi r3, 0 + beq ftrace_no_trace +#endif + + .if \allregs == 1 + SAVE_GPR(2, r1) + SAVE_GPRS(13, 31, r1) + .else +#if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE) + SAVE_GPR(14, r1) +#endif + .endif + + /* Save previous stack pointer (r1) */ + addi r8, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + PPC_STL r8, GPR1(r1) + + .if \allregs == 1 + /* Load special regs for save below */ + mfcr r7 + mfmsr r8 + mfctr r9 + mfxer r10 + .else + /* Clear MSR to flag as ftrace_caller versus frace_regs_caller */ + li r8, 0 + .endif + +#ifdef CONFIG_PPC64 + /* Save callee's TOC in the ABI compliant location */ + std r2, STK_GOT(r1) + LOAD_PACA_TOC() /* get kernel TOC in r2 */ +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + /* r11 points to the instruction following the call to ftrace */ + PPC_LL r5, -(MCOUNT_INSN_SIZE*2 + SZL)(r11) + PPC_LL r12, FTRACE_OPS_FUNC(r5) + mtctr r12 +#else /* !CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS */ +#ifdef CONFIG_PPC64 + LOAD_REG_ADDR(r3, function_trace_op) + ld r5,0(r3) +#else + lis r3,function_trace_op@ha + lwz r5,function_trace_op@l(r3) +#endif +#endif + + /* Save special regs */ + PPC_STL r8, _MSR(r1) + .if \allregs == 1 + PPC_STL r7, _CCR(r1) + PPC_STL r9, _CTR(r1) + PPC_STL r10, _XER(r1) + .endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + /* Clear orig_gpr3 to later detect ftrace_direct call */ + li r7, 0 + PPC_STL r7, ORIG_GPR3(r1) +#endif + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + /* Save our real return address in nvr for return */ + .if \allregs == 0 + SAVE_GPR(15, r1) + .endif + mr r15, r11 + /* + * We want the ftrace location in the function, but our lr (in r11) + * points at the 'mtlr r0' instruction in the out of line stub. To + * recover the ftrace location, we read the branch instruction in the + * stub, and adjust our lr by the branch offset. + * + * See ftrace_init_ool_stub() for the profile sequence. + */ + lwz r8, MCOUNT_INSN_SIZE(r11) + slwi r8, r8, 6 + srawi r8, r8, 6 + add r3, r11, r8 + /* + * Override our nip to point past the branch in the original function. + * This allows reliable stack trace and the ftrace stack tracer to work as-is. + */ + addi r11, r3, MCOUNT_INSN_SIZE +#else + /* Calculate ip from nip-4 into r3 for call below */ + subi r3, r11, MCOUNT_INSN_SIZE +#endif + + /* Save NIP as pt_regs->nip */ + PPC_STL r11, _NIP(r1) + /* Also save it in B's stackframe header for proper unwind */ + PPC_STL r11, LRSAVE+SWITCH_FRAME_SIZE(r1) +#if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE) + mr r14, r11 /* remember old NIP */ +#endif + + /* Put the original return address in r4 as parent_ip */ + mr r4, r0 + + /* Load &pt_regs in r6 for call below */ + addi r6, r1, STACK_INT_FRAME_REGS +.endm + +.macro ftrace_regs_exit allregs +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + /* Check orig_gpr3 to detect ftrace_direct call */ + PPC_LL r3, ORIG_GPR3(r1) + PPC_LCMPI cr1, r3, 0 + mtctr r3 +#endif + + /* Restore possibly modified LR */ + PPC_LL r0, _LINK(r1) + +#ifndef CONFIG_PPC_FTRACE_OUT_OF_LINE + /* Load ctr with the possibly modified NIP */ + PPC_LL r3, _NIP(r1) +#ifdef CONFIG_LIVEPATCH_64 + cmpd r14, r3 /* has NIP been altered? */ +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + beq cr1,2f + mtlr r3 + b 3f +#endif +2: mtctr r3 + mtlr r0 +3: + +#else /* !CONFIG_PPC_FTRACE_OUT_OF_LINE */ + /* Load LR with the possibly modified NIP */ + PPC_LL r3, _NIP(r1) + cmpd r14, r3 /* has NIP been altered? */ + bne- 1f + + mr r3, r15 +1: mtlr r3 + .if \allregs == 0 + REST_GPR(15, r1) + .endif +#endif + + /* Restore gprs */ + .if \allregs == 1 + REST_GPRS(2, 31, r1) + .else + REST_GPRS(3, 10, r1) +#if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE) + REST_GPR(14, r1) +#endif + .endif + +#ifdef CONFIG_PPC64 + /* Restore callee's TOC */ + ld r2, STK_GOT(r1) +#endif + + /* Pop our stack frame */ + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + +#ifdef CONFIG_LIVEPATCH_64 + /* Based on the cmpd above, if the NIP was altered handle livepatch */ + bne- livepatch_handler +#endif + + /* jump after _mcount site */ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + bnectr cr1 +#endif + /* + * Return with blr to keep the link stack balanced. The function profiling sequence + * uses 'mtlr r0' to restore LR. + */ + blr +#else + bctr +#endif +.endm + +.macro ftrace_regs_func allregs +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + bctrl +#else + .if \allregs == 1 +.globl ftrace_regs_call +ftrace_regs_call: + .else +.globl ftrace_call +ftrace_call: + .endif + /* ftrace_call(r3, r4, r5, r6) */ + bl ftrace_stub +#endif +.endm + +_GLOBAL(ftrace_regs_caller) + ftrace_regs_entry 1 + ftrace_regs_func 1 + ftrace_regs_exit 1 + +_GLOBAL(ftrace_caller) + ftrace_regs_entry 0 + ftrace_regs_func 0 + ftrace_regs_exit 0 + +_GLOBAL(ftrace_stub) + blr + +#ifdef CONFIG_PPC64 +ftrace_no_trace: +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + REST_GPR(3, r1) + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + blr +#else + mflr r3 + mtctr r3 + REST_GPR(3, r1) + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + mtlr r0 + bctr +#endif +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +.Lftrace_direct_call_regs: + mtctr r12 + REST_GPRS(11, 12, r1) + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + bctr +.Lftrace_direct_call: + mtctr r12 + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + bctr +SYM_FUNC_START(ftrace_stub_direct_tramp) + blr +SYM_FUNC_END(ftrace_stub_direct_tramp) +#endif + +#ifdef CONFIG_LIVEPATCH_64 + /* + * This function runs in the mcount context, between two functions. As + * such it can only clobber registers which are volatile and used in + * function linkage. + * + * We get here when a function A, calls another function B, but B has + * been live patched with a new function C. + * + * On entry, we have no stack frame and can not allocate one. + * + * With PPC_FTRACE_OUT_OF_LINE=n, on entry: + * - LR points back to the original caller (in A) + * - CTR holds the new NIP in C + * - r0, r11 & r12 are free + * + * With PPC_FTRACE_OUT_OF_LINE=y, on entry: + * - r0 points back to the original caller (in A) + * - LR holds the new NIP in C + * - r11 & r12 are free + */ +livepatch_handler: + ld r12, PACA_THREAD_INFO(r13) + + /* Allocate 3 x 8 bytes */ + ld r11, TI_livepatch_sp(r12) + addi r11, r11, 24 + std r11, TI_livepatch_sp(r12) + + /* Store stack end marker */ + lis r12, STACK_END_MAGIC@h + ori r12, r12, STACK_END_MAGIC@l + std r12, -8(r11) + + /* Save toc & real LR on livepatch stack */ + std r2, -24(r11) +#ifndef CONFIG_PPC_FTRACE_OUT_OF_LINE + mflr r12 + std r12, -16(r11) + mfctr r12 +#else + std r0, -16(r11) + mflr r12 + /* Put ctr in r12 for global entry and branch there */ + mtctr r12 +#endif + bctrl + + /* + * Now we are returning from the patched function to the original + * caller A. We are free to use r11, r12 and we can use r2 until we + * restore it. + */ + + ld r12, PACA_THREAD_INFO(r13) + + ld r11, TI_livepatch_sp(r12) + + /* Check stack marker hasn't been trashed */ + lis r2, STACK_END_MAGIC@h + ori r2, r2, STACK_END_MAGIC@l + ld r12, -8(r11) +1: tdne r12, r2 + EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0 + + /* Restore LR & toc from livepatch stack */ + ld r12, -16(r11) + mtlr r12 + ld r2, -24(r11) + + /* Pop livepatch stack frame */ + ld r12, PACA_THREAD_INFO(r13) + subi r11, r11, 24 + std r11, TI_livepatch_sp(r12) + + /* Return to original caller of live patched function */ + blr +#endif /* CONFIG_LIVEPATCH */ + +#ifndef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY +_GLOBAL(mcount) +_GLOBAL(_mcount) +EXPORT_SYMBOL(_mcount) + mflr r12 + mtctr r12 + mtlr r0 + bctr +#endif + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +_GLOBAL(return_to_handler) + /* need to save return values */ +#ifdef CONFIG_PPC64 + stdu r1, -SWITCH_FRAME_SIZE(r1) + std r4, GPR4(r1) + std r3, GPR3(r1) + /* Save previous stack pointer (r1) */ + addi r3, r1, SWITCH_FRAME_SIZE + std r3, GPR1(r1) + /* save TOC */ + std r2, 24(r1) + std r31, 32(r1) + mr r31, r1 + /* pass ftrace_regs/pt_regs to ftrace_return_to_handler */ + addi r3, r1, STACK_INT_FRAME_REGS + /* + * We might be called from a module. + * Switch to our TOC to run inside the core kernel. + */ + LOAD_PACA_TOC() +#else + stwu r1, -SWITCH_FRAME_SIZE(r1) + stw r4, GPR4(r1) + stw r3, GPR3(r1) + addi r3, r1, SWITCH_FRAME_SIZE + stw r3, GPR1(r1) + /* pass ftrace_regs/pt_regs to ftrace_return_to_handler */ + addi r3, r1, STACK_INT_FRAME_REGS +#endif + + bl ftrace_return_to_handler + nop + + /* return value has real return address */ + mtlr r3 + +#ifdef CONFIG_PPC64 + ld r4, GPR4(r1) + ld r3, GPR3(r1) + ld r2, 24(r1) + ld r31, 32(r1) + ld r1, 0(r1) +#else + lwz r3, GPR3(r1) + lwz r4, GPR4(r1) + addi r1, r1, SWITCH_FRAME_SIZE +#endif + + /* Jump back to real return address */ + blr +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +SYM_DATA(ftrace_ool_stub_text_count, .long CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE) + +SYM_START(ftrace_ool_stub_text, SYM_L_GLOBAL, .balign SZL) + .space CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE * FTRACE_OOL_STUB_SIZE +SYM_CODE_END(ftrace_ool_stub_text) +#endif + +.pushsection ".tramp.ftrace.text","aw",@progbits; +.globl ftrace_tramp_text +ftrace_tramp_text: + .space 32 +.popsection + +.pushsection ".tramp.ftrace.init","aw",@progbits; +.globl ftrace_tramp_init +ftrace_tramp_init: + .space 32 +.popsection diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S deleted file mode 100644 index ffb1db386849..000000000000 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ /dev/null @@ -1,251 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Split from ftrace_64.S - */ - -#include <linux/magic.h> -#include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> -#include <asm/ftrace.h> -#include <asm/ppc-opcode.h> -#include <asm/export.h> -#include <asm/thread_info.h> -#include <asm/bug.h> -#include <asm/ptrace.h> - -/* - * - * ftrace_caller()/ftrace_regs_caller() is the function that replaces _mcount() - * when ftrace is active. - * - * We arrive here after a function A calls function B, and we are the trace - * function for B. When we enter r1 points to A's stack frame, B has not yet - * had a chance to allocate one yet. - * - * Additionally r2 may point either to the TOC for A, or B, depending on - * whether B did a TOC setup sequence before calling us. - * - * On entry the LR points back to the _mcount() call site, and r0 holds the - * saved LR as it was on entry to B, ie. the original return address at the - * call site in A. - * - * Our job is to save the register state into a struct pt_regs (on the stack) - * and then arrange for the ftrace function to be called. - */ -.macro ftrace_regs_entry allregs - /* Create our stack frame + pt_regs */ - PPC_STLU r1,-SWITCH_FRAME_SIZE(r1) - - /* Save all gprs to pt_regs */ - SAVE_GPR(0, r1) - SAVE_GPRS(3, 10, r1) - -#ifdef CONFIG_PPC64 - /* Save the original return address in A's stack frame */ - std r0, LRSAVE+SWITCH_FRAME_SIZE(r1) - /* Ok to continue? */ - lbz r3, PACA_FTRACE_ENABLED(r13) - cmpdi r3, 0 - beq ftrace_no_trace -#endif - - .if \allregs == 1 - SAVE_GPR(2, r1) - SAVE_GPRS(11, 31, r1) - .else -#ifdef CONFIG_LIVEPATCH_64 - SAVE_GPR(14, r1) -#endif - .endif - - /* Save previous stack pointer (r1) */ - addi r8, r1, SWITCH_FRAME_SIZE - PPC_STL r8, GPR1(r1) - - .if \allregs == 1 - /* Load special regs for save below */ - mfmsr r8 - mfctr r9 - mfxer r10 - mfcr r11 - .else - /* Clear MSR to flag as ftrace_caller versus frace_regs_caller */ - li r8, 0 - .endif - - /* Get the _mcount() call site out of LR */ - mflr r7 - /* Save it as pt_regs->nip */ - PPC_STL r7, _NIP(r1) - /* Save the read LR in pt_regs->link */ - PPC_STL r0, _LINK(r1) - -#ifdef CONFIG_PPC64 - /* Save callee's TOC in the ABI compliant location */ - std r2, STK_GOT(r1) - LOAD_PACA_TOC() /* get kernel TOC in r2 */ - LOAD_REG_ADDR(r3, function_trace_op) - ld r5,0(r3) -#else - lis r3,function_trace_op@ha - lwz r5,function_trace_op@l(r3) -#endif - -#ifdef CONFIG_LIVEPATCH_64 - mr r14, r7 /* remember old NIP */ -#endif - - /* Calculate ip from nip-4 into r3 for call below */ - subi r3, r7, MCOUNT_INSN_SIZE - - /* Put the original return address in r4 as parent_ip */ - mr r4, r0 - - /* Save special regs */ - PPC_STL r8, _MSR(r1) - .if \allregs == 1 - PPC_STL r9, _CTR(r1) - PPC_STL r10, _XER(r1) - PPC_STL r11, _CCR(r1) - .endif - - /* Load &pt_regs in r6 for call below */ - addi r6, r1, STACK_INT_FRAME_REGS -.endm - -.macro ftrace_regs_exit allregs - /* Load ctr with the possibly modified NIP */ - PPC_LL r3, _NIP(r1) - mtctr r3 - -#ifdef CONFIG_LIVEPATCH_64 - cmpd r14, r3 /* has NIP been altered? */ -#endif - - /* Restore gprs */ - .if \allregs == 1 - REST_GPRS(2, 31, r1) - .else - REST_GPRS(3, 10, r1) -#ifdef CONFIG_LIVEPATCH_64 - REST_GPR(14, r1) -#endif - .endif - - /* Restore possibly modified LR */ - PPC_LL r0, _LINK(r1) - mtlr r0 - -#ifdef CONFIG_PPC64 - /* Restore callee's TOC */ - ld r2, STK_GOT(r1) -#endif - - /* Pop our stack frame */ - addi r1, r1, SWITCH_FRAME_SIZE - -#ifdef CONFIG_LIVEPATCH_64 - /* Based on the cmpd above, if the NIP was altered handle livepatch */ - bne- livepatch_handler -#endif - bctr /* jump after _mcount site */ -.endm - -_GLOBAL(ftrace_regs_caller) - ftrace_regs_entry 1 - /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_regs_call -ftrace_regs_call: - bl ftrace_stub - nop - ftrace_regs_exit 1 - -_GLOBAL(ftrace_caller) - ftrace_regs_entry 0 - /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_call -ftrace_call: - bl ftrace_stub - nop - ftrace_regs_exit 0 - -_GLOBAL(ftrace_stub) - blr - -#ifdef CONFIG_PPC64 -ftrace_no_trace: - mflr r3 - mtctr r3 - REST_GPR(3, r1) - addi r1, r1, SWITCH_FRAME_SIZE - mtlr r0 - bctr -#endif - -#ifdef CONFIG_LIVEPATCH_64 - /* - * This function runs in the mcount context, between two functions. As - * such it can only clobber registers which are volatile and used in - * function linkage. - * - * We get here when a function A, calls another function B, but B has - * been live patched with a new function C. - * - * On entry: - * - we have no stack frame and can not allocate one - * - LR points back to the original caller (in A) - * - CTR holds the new NIP in C - * - r0, r11 & r12 are free - */ -livepatch_handler: - ld r12, PACA_THREAD_INFO(r13) - - /* Allocate 3 x 8 bytes */ - ld r11, TI_livepatch_sp(r12) - addi r11, r11, 24 - std r11, TI_livepatch_sp(r12) - - /* Save toc & real LR on livepatch stack */ - std r2, -24(r11) - mflr r12 - std r12, -16(r11) - - /* Store stack end marker */ - lis r12, STACK_END_MAGIC@h - ori r12, r12, STACK_END_MAGIC@l - std r12, -8(r11) - - /* Put ctr in r12 for global entry and branch there */ - mfctr r12 - bctrl - - /* - * Now we are returning from the patched function to the original - * caller A. We are free to use r11, r12 and we can use r2 until we - * restore it. - */ - - ld r12, PACA_THREAD_INFO(r13) - - ld r11, TI_livepatch_sp(r12) - - /* Check stack marker hasn't been trashed */ - lis r2, STACK_END_MAGIC@h - ori r2, r2, STACK_END_MAGIC@l - ld r12, -8(r11) -1: tdne r12, r2 - EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0 - - /* Restore LR & toc from livepatch stack */ - ld r12, -16(r11) - mtlr r12 - ld r2, -24(r11) - - /* Pop livepatch stack frame */ - ld r12, PACA_THREAD_INFO(r13) - subi r11, r11, 24 - std r11, TI_livepatch_sp(r12) - - /* Return to original caller of live patched function */ - blr -#endif /* CONFIG_LIVEPATCH */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 9bdd79aa51cf..cb8e9357383e 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -121,7 +121,7 @@ static void pmac_backlight_unblank(void) props = &pmac_backlight->props; props->brightness = props->max_brightness; - props->power = FB_BLANK_UNBLANK; + props->power = BACKLIGHT_POWER_ON; backlight_update_status(pmac_backlight); } mutex_unlock(&pmac_backlight_mutex); @@ -157,7 +157,7 @@ static int die_owner = -1; static unsigned int die_nest_count; static int die_counter; -extern void panic_flush_kmsg_start(void) +void panic_flush_kmsg_start(void) { /* * These are mostly taken from kernel/panic.c, but tries to do @@ -170,7 +170,7 @@ extern void panic_flush_kmsg_start(void) bust_spinlocks(1); } -extern void panic_flush_kmsg_end(void) +void panic_flush_kmsg_end(void) { kmsg_dump(KMSG_DUMP_PANIC); bust_spinlocks(0); @@ -263,10 +263,9 @@ static int __die(const char *str, struct pt_regs *regs, long err) { printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); - printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n", + printk("%s PAGE_SIZE=%luK%s %s%s%s%s %s\n", IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE", PAGE_SIZE / 1024, get_mmu_str(), - IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", IS_ENABLED(CONFIG_SMP) ? " SMP" : "", IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "", debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", @@ -404,7 +403,7 @@ noinstr void hv_nmi_check_nonrecoverable(struct pt_regs *regs) return; if (!(regs->msr & MSR_HV)) return; - if (regs->msr & MSR_PR) + if (user_mode(regs)) return; /* @@ -1158,12 +1157,13 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception) * pretend we got a single-step exception. This was pointed out * by Kumar Gala. -- paulus */ -static void emulate_single_step(struct pt_regs *regs) +void emulate_single_step(struct pt_regs *regs) { if (single_stepping(regs)) __single_step_exception(regs); } +#ifdef CONFIG_PPC_FPU_REGS static inline int __parse_fpscr(unsigned long fpscr) { int ret = FPE_FLTUNK; @@ -1190,6 +1190,7 @@ static inline int __parse_fpscr(unsigned long fpscr) return ret; } +#endif static void parse_fpe(struct pt_regs *regs) { @@ -1437,10 +1438,12 @@ static int emulate_instruction(struct pt_regs *regs) return -EINVAL; } +#ifdef CONFIG_GENERIC_BUG int is_valid_bugaddr(unsigned long addr) { return is_kernel_addr(addr); } +#endif #ifdef CONFIG_MATH_EMULATION static int emulate_math(struct pt_regs *regs) @@ -1506,18 +1509,17 @@ static void do_program_check(struct pt_regs *regs) if (!is_kernel_addr(bugaddr) && !(regs->msr & MSR_IR)) bugaddr += PAGE_OFFSET; - if (!(regs->msr & MSR_PR) && /* not user-mode */ + if (!user_mode(regs) && report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) { - const struct exception_table_entry *entry; + regs_add_return_ip(regs, 4); + return; + } - entry = search_exception_tables(bugaddr); - if (entry) { - regs_set_return_ip(regs, extable_fixup(entry) + regs->nip - bugaddr); - return; - } + /* User mode considers other cases after enabling IRQs */ + if (!user_mode(regs)) { + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); + return; } - _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); - return; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (reason & REASON_TM) { @@ -1550,16 +1552,44 @@ static void do_program_check(struct pt_regs *regs) /* * If we took the program check in the kernel skip down to sending a - * SIGILL. The subsequent cases all relate to emulating instructions - * which we should only do for userspace. We also do not want to enable - * interrupts for kernel faults because that might lead to further - * faults, and loose the context of the original exception. + * SIGILL. The subsequent cases all relate to user space, such as + * emulating instructions which we should only do for user space. We + * also do not want to enable interrupts for kernel faults because that + * might lead to further faults, and loose the context of the original + * exception. */ if (!user_mode(regs)) goto sigill; interrupt_cond_local_irq_enable(regs); + /* + * (reason & REASON_TRAP) is mostly handled before enabling IRQs, + * except get_user_instr() can sleep so we cannot reliably inspect the + * current instruction in that context. Now that we know we are + * handling a user space trap and can sleep, we can check if the trap + * was a hashchk failure. + */ + if (reason & REASON_TRAP) { + if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) { + ppc_inst_t insn; + + if (get_user_instr(insn, (void __user *)regs->nip)) { + _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); + return; + } + + if (ppc_inst_primary_opcode(insn) == 31 && + get_xop(ppc_inst_val(insn)) == OP_31_XOP_HASHCHK) { + _exception(SIGILL, regs, ILL_ILLOPN, regs->nip); + return; + } + } + + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); + return; + } + /* (reason & REASON_ILLEGAL) would be the obvious thing here, * but there seems to be a hardware bug on the 405GP (RevD) * that means ESR is sometimes set incorrectly - either to @@ -2213,22 +2243,11 @@ void __noreturn unrecoverable_exception(struct pt_regs *regs) ; } -#if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x) -/* - * Default handler for a Watchdog exception, - * spins until a reboot occurs - */ -void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs) -{ - /* Generic WatchdogHandler, implement your own */ - mtspr(SPRN_TCR, mfspr(SPRN_TCR)&(~TCR_WIE)); - return; -} - +#ifdef CONFIG_BOOKE_WDT DEFINE_INTERRUPT_HANDLER_NMI(WatchdogException) { printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n"); - WatchdogHandler(regs); + mtspr(SPRN_TCR, mfspr(SPRN_TCR) & ~TCR_WIE); return 0; } #endif diff --git a/arch/powerpc/kernel/ucall.S b/arch/powerpc/kernel/ucall.S index 07296bc39166..80a1f9a4300a 100644 --- a/arch/powerpc/kernel/ucall.S +++ b/arch/powerpc/kernel/ucall.S @@ -5,8 +5,8 @@ * Copyright 2019, IBM Corporation. * */ +#include <linux/export.h> #include <asm/ppc_asm.h> -#include <asm/export.h> _GLOBAL(ucall_norets) EXPORT_SYMBOL_GPL(ucall_norets) diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index 92b3fc258d11..862b22b2b616 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -36,12 +36,6 @@ void __init udbg_early_init(void) #elif defined(CONFIG_PPC_EARLY_DEBUG_RTAS_PANEL) /* RTAS panel debug */ udbg_init_rtas_panel(); -#elif defined(CONFIG_PPC_EARLY_DEBUG_RTAS_CONSOLE) - /* RTAS console debug */ - udbg_init_rtas_console(); -#elif defined(CONFIG_PPC_EARLY_DEBUG_MAPLE) - /* Maple real mode debug */ - udbg_init_maple_realmode(); #elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE) udbg_init_pas_realmode(); #elif defined(CONFIG_PPC_EARLY_DEBUG_BOOTX) @@ -49,9 +43,6 @@ void __init udbg_early_init(void) #elif defined(CONFIG_PPC_EARLY_DEBUG_44x) /* PPC44x debug */ udbg_init_44x_as1(); -#elif defined(CONFIG_PPC_EARLY_DEBUG_40x) - /* PPC40x debug */ - udbg_init_40x_realmode(); #elif defined(CONFIG_PPC_EARLY_DEBUG_CPM) udbg_init_cpm(); #elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO) diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 74ddf836f7a2..dfe8ed2192e8 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -7,7 +7,6 @@ #include <linux/types.h> #include <asm/udbg.h> #include <asm/io.h> -#include <asm/reg_a2.h> #include <asm/early_ioremap.h> extern u8 real_readb(volatile u8 __iomem *addr); @@ -206,29 +205,6 @@ void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride) udbg_use_uart(); } -#ifdef CONFIG_PPC_MAPLE - -#define UDBG_UART_MAPLE_ADDR ((void __iomem *)0xf40003f8) - -static u8 udbg_uart_in_maple(unsigned int reg) -{ - return real_readb(UDBG_UART_MAPLE_ADDR + reg); -} - -static void udbg_uart_out_maple(unsigned int reg, u8 val) -{ - real_writeb(val, UDBG_UART_MAPLE_ADDR + reg); -} - -void __init udbg_init_maple_realmode(void) -{ - udbg_uart_in = udbg_uart_in_maple; - udbg_uart_out = udbg_uart_out_maple; - udbg_use_uart(); -} - -#endif /* CONFIG_PPC_MAPLE */ - #ifdef CONFIG_PPC_PASEMI #define UDBG_UART_PAS_ADDR ((void __iomem *)0xfcff03f8UL) @@ -275,29 +251,6 @@ void __init udbg_init_44x_as1(void) #endif /* CONFIG_PPC_EARLY_DEBUG_44x */ -#ifdef CONFIG_PPC_EARLY_DEBUG_40x - -static u8 udbg_uart_in_40x(unsigned int reg) -{ - return real_readb((void __iomem *)CONFIG_PPC_EARLY_DEBUG_40x_PHYSADDR - + reg); -} - -static void udbg_uart_out_40x(unsigned int reg, u8 val) -{ - real_writeb(val, (void __iomem *)CONFIG_PPC_EARLY_DEBUG_40x_PHYSADDR - + reg); -} - -void __init udbg_init_40x_realmode(void) -{ - udbg_uart_in = udbg_uart_in_40x; - udbg_uart_out = udbg_uart_out_40x; - udbg_use_uart(); -} - -#endif /* CONFIG_PPC_EARLY_DEBUG_40x */ - #ifdef CONFIG_PPC_EARLY_DEBUG_16550 static void __iomem *udbg_uart_early_addr; diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 507f8228f983..ab7c4cc80943 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -16,12 +16,12 @@ #include <linux/user.h> #include <linux/elf.h> #include <linux/security.h> -#include <linux/memblock.h> #include <linux/syscalls.h> -#include <linux/time_namespace.h> +#include <linux/vdso_datastore.h> #include <vdso/datapage.h> #include <asm/syscall.h> +#include <asm/syscalls.h> #include <asm/processor.h> #include <asm/mmu.h> #include <asm/mmu_context.h> @@ -33,31 +33,14 @@ #include <asm/vdso_datapage.h> #include <asm/setup.h> +static_assert(__VDSO_PAGES == VDSO_NR_PAGES); + /* The alignment of the vDSO */ #define VDSO_ALIGNMENT (1 << 16) extern char vdso32_start, vdso32_end; extern char vdso64_start, vdso64_end; -long sys_ni_syscall(void); - -/* - * The vdso data page (aka. systemcfg for old ppc64 fans) is here. - * Once the early boot kernel code no longer needs to muck around - * with it, it will become dynamically allocated - */ -static union { - struct vdso_arch_data data; - u8 page[PAGE_SIZE]; -} vdso_data_store __page_aligned_data; -struct vdso_arch_data *vdso_data = &vdso_data_store.data; - -enum vvar_pages { - VVAR_DATA_PAGE_OFFSET, - VVAR_TIMENS_PAGE_OFFSET, - VVAR_NR_PAGES, -}; - static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma, unsigned long text_size) { @@ -81,90 +64,33 @@ static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_str return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start); } -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf); +static void vdso_close(const struct vm_special_mapping *sm, struct vm_area_struct *vma) +{ + struct mm_struct *mm = vma->vm_mm; -static struct vm_special_mapping vvar_spec __ro_after_init = { - .name = "[vvar]", - .fault = vvar_fault, -}; + /* + * close() is called for munmap() but also for mremap(). In the mremap() + * case the vdso pointer has already been updated by the mremap() hook + * above, so it must not be set to NULL here. + */ + if (vma->vm_start != (unsigned long)mm->context.vdso) + return; + + mm->context.vdso = NULL; +} static struct vm_special_mapping vdso32_spec __ro_after_init = { .name = "[vdso]", .mremap = vdso32_mremap, + .close = vdso_close, }; static struct vm_special_mapping vdso64_spec __ro_after_init = { .name = "[vdso]", .mremap = vdso64_mremap, + .close = vdso_close, }; -#ifdef CONFIG_TIME_NS -struct vdso_data *arch_get_vdso_data(void *vvar_page) -{ - return ((struct vdso_arch_data *)vvar_page)->data; -} - -/* - * The vvar mapping contains data for a specific time namespace, so when a task - * changes namespace we must unmap its vvar data for the old namespace. - * Subsequent faults will map in data for the new namespace. - * - * For more details see timens_setup_vdso_data(). - */ -int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) -{ - struct mm_struct *mm = task->mm; - VMA_ITERATOR(vmi, mm, 0); - struct vm_area_struct *vma; - - mmap_read_lock(mm); - for_each_vma(vmi, vma) { - unsigned long size = vma->vm_end - vma->vm_start; - - if (vma_is_special_mapping(vma, &vvar_spec)) - zap_page_range(vma, vma->vm_start, size); - } - mmap_read_unlock(mm); - - return 0; -} -#endif - -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *timens_page = find_timens_vvar_page(vma); - unsigned long pfn; - - switch (vmf->pgoff) { - case VVAR_DATA_PAGE_OFFSET: - if (timens_page) - pfn = page_to_pfn(timens_page); - else - pfn = virt_to_pfn(vdso_data); - break; -#ifdef CONFIG_TIME_NS - case VVAR_TIMENS_PAGE_OFFSET: - /* - * If a task belongs to a time namespace then a namespace - * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and - * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET - * offset. - * See also the comment near timens_setup_vdso_data(). - */ - if (!timens_page) - return VM_FAULT_SIGBUS; - pfn = virt_to_pfn(vdso_data); - break; -#endif /* CONFIG_TIME_NS */ - default: - return VM_FAULT_SIGBUS; - } - - return vmf_insert_pfn(vma, vmf->address, pfn); -} - /* * This is called from binfmt_elf, we create the special vma for the * vDSO and insert it into the mm struct tree @@ -173,7 +99,7 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int { unsigned long vdso_size, vdso_base, mappings_size; struct vm_special_mapping *vdso_spec; - unsigned long vvar_size = VVAR_NR_PAGES * PAGE_SIZE; + unsigned long vvar_size = VDSO_NR_PAGES * PAGE_SIZE; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; @@ -199,16 +125,7 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int /* Add required alignment. */ vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT); - /* - * Put vDSO base into mm struct. We need to do this before calling - * install_special_mapping or the perf counter mmap tracking code - * will fail to recognise it as a vDSO. - */ - mm->context.vdso = (void __user *)vdso_base + vvar_size; - - vma = _install_special_mapping(mm, vdso_base, vvar_size, - VM_READ | VM_MAYREAD | VM_IO | - VM_DONTDUMP | VM_PFNMAP, &vvar_spec); + vma = vdso_install_vvar_mapping(mm, vdso_base); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -225,10 +142,15 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int vma = _install_special_mapping(mm, vdso_base + vvar_size, vdso_size, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, vdso_spec); - if (IS_ERR(vma)) + if (IS_ERR(vma)) { do_munmap(mm, vdso_base, vvar_size, NULL); + return PTR_ERR(vma); + } + + // Now that the mappings are in place, set the mm VDSO pointer + mm->context.vdso = (void __user *)vdso_base + vvar_size; - return PTR_ERR_OR_ZERO(vma); + return 0; } int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) @@ -242,8 +164,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) return -EINTR; rc = __arch_setup_additional_pages(bprm, uses_interp); - if (rc) - mm->context.vdso = NULL; mmap_write_unlock(mm); return rc; @@ -285,10 +205,10 @@ static void __init vdso_setup_syscall_map(void) for (i = 0; i < NR_syscalls; i++) { if (sys_call_table[i] != (void *)&sys_ni_syscall) - vdso_data->syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); + vdso_k_arch_data->syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); if (IS_ENABLED(CONFIG_COMPAT) && compat_sys_call_table[i] != (void *)&sys_ni_syscall) - vdso_data->compat_syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); + vdso_k_arch_data->compat_syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); } } @@ -338,29 +258,10 @@ static struct page ** __init vdso_setup_pages(void *start, void *end) static int __init vdso_init(void) { #ifdef CONFIG_PPC64 - /* - * Fill up the "systemcfg" stuff for backward compatibility - */ - strcpy((char *)vdso_data->eye_catcher, "SYSTEMCFG:PPC64"); - vdso_data->version.major = SYSTEMCFG_MAJOR; - vdso_data->version.minor = SYSTEMCFG_MINOR; - vdso_data->processor = mfspr(SPRN_PVR); - /* - * Fake the old platform number for pSeries and add - * in LPAR bit if necessary - */ - vdso_data->platform = 0x100; - if (firmware_has_feature(FW_FEATURE_LPAR)) - vdso_data->platform |= 1; - vdso_data->physicalMemorySize = memblock_phys_mem_size(); - vdso_data->dcache_size = ppc64_caches.l1d.size; - vdso_data->dcache_line_size = ppc64_caches.l1d.line_size; - vdso_data->icache_size = ppc64_caches.l1i.size; - vdso_data->icache_line_size = ppc64_caches.l1i.line_size; - vdso_data->dcache_block_size = ppc64_caches.l1d.block_size; - vdso_data->icache_block_size = ppc64_caches.l1i.block_size; - vdso_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size; - vdso_data->icache_log_block_size = ppc64_caches.l1i.log_block_size; + vdso_k_arch_data->dcache_block_size = ppc64_caches.l1d.block_size; + vdso_k_arch_data->icache_block_size = ppc64_caches.l1i.block_size; + vdso_k_arch_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size; + vdso_k_arch_data->icache_log_block_size = ppc64_caches.l1i.log_block_size; #endif /* CONFIG_PPC64 */ vdso_setup_syscall_map(); diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index 6a977b0d8ffc..8834dfe9d727 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -2,31 +2,27 @@ # List of files in the vdso, has to be asm only for now -ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24 -include $(srctree)/lib/vdso/Makefile +# Include the generic Makefile to check the built vdso. +include $(srctree)/lib/vdso/Makefile.include obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o note-32.o getcpu-32.o obj-vdso64 = sigtramp64-64.o gettimeofday-64.o datapage-64.o cacheflush-64.o note-64.o getcpu-64.o +obj-vdso32 += getrandom-32.o vgetrandom-chacha-32.o +obj-vdso64 += getrandom-64.o vgetrandom-chacha-64.o + ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday-32.o += -include $(c-gettimeofday-y) - CFLAGS_vgettimeofday-32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) - CFLAGS_vgettimeofday-32.o += $(call cc-option, -fno-stack-protector) - CFLAGS_vgettimeofday-32.o += -DDISABLE_BRANCH_PROFILING - CFLAGS_vgettimeofday-32.o += -ffreestanding -fasynchronous-unwind-tables - CFLAGS_REMOVE_vgettimeofday-32.o = $(CC_FLAGS_FTRACE) - CFLAGS_REMOVE_vgettimeofday-32.o += -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc - CFLAGS_vgettimeofday-64.o += -include $(c-gettimeofday-y) - CFLAGS_vgettimeofday-64.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) - CFLAGS_vgettimeofday-64.o += $(call cc-option, -fno-stack-protector) - CFLAGS_vgettimeofday-64.o += -DDISABLE_BRANCH_PROFILING - CFLAGS_vgettimeofday-64.o += -ffreestanding -fasynchronous-unwind-tables - CFLAGS_REMOVE_vgettimeofday-64.o = $(CC_FLAGS_FTRACE) # Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true # by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is # compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code # generation is minimal, it will just use r29 instead. - CFLAGS_vgettimeofday-64.o += $(call cc-option, -ffixed-r30) + CFLAGS_vgettimeofday-64.o += -include $(c-gettimeofday-y) $(call cc-option, -ffixed-r30) +endif + +ifneq ($(c-getrandom-y),) + CFLAGS_vgetrandom-32.o += -include $(c-getrandom-y) + CFLAGS_vgetrandom-64.o += -include $(c-getrandom-y) endif # Build rules @@ -37,51 +33,74 @@ else VDSOCC := $(CC) endif -targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o +targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o vgetrandom-32.o +targets += crtsavres-32.o obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) -targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o +targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o vgetrandom-64.o obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) -GCOV_PROFILE := n -KCOV_INSTRUMENT := n -UBSAN_SANITIZE := n -KASAN_SANITIZE := n - -ccflags-y := -shared -fno-common -fno-builtin -nostdlib -Wl,--hash-style=both -ccflags-$(CONFIG_LD_IS_LLD) += $(call cc-option,--ld-path=$(LD),-fuse-ld=lld) - -CC32FLAGS := -Wl,-soname=linux-vdso32.so.1 -m32 -AS32FLAGS := -D__VDSO32__ -s +ccflags-y := -fno-common -fno-builtin -DBUILD_VDSO +ccflags-y += $(DISABLE_LATENT_ENTROPY_PLUGIN) +ccflags-y += $(call cc-option, -fno-stack-protector) +ccflags-y += -DDISABLE_BRANCH_PROFILING +ccflags-y += -ffreestanding -fasynchronous-unwind-tables +ccflags-remove-y := $(CC_FLAGS_FTRACE) +ldflags-y := -Wl,--hash-style=both -nostdlib -shared -z noexecstack $(CLANG_FLAGS) +ldflags-$(CONFIG_LD_IS_LLD) += $(call cc-option,--ld-path=$(LD),-fuse-ld=lld) +ldflags-$(CONFIG_LD_ORPHAN_WARN) += -Wl,--orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) + +# Filter flags that clang will warn are unused for linking +ldflags-y += $(filter-out $(CC_AUTO_VAR_INIT_ZERO_ENABLER) $(CC_FLAGS_FTRACE) -Wa$(comma)%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) + +CC32FLAGS := -m32 +CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc -mpcrel +ifdef CONFIG_CC_IS_CLANG +# This flag is supported by clang for 64-bit but not 32-bit so it will cause +# an unused command line flag warning for this file. +CC32FLAGSREMOVE += -fno-stack-clash-protection +# -mstack-protector-guard values from the 64-bit build are not valid for the +# 32-bit one. clang validates the values passed to these arguments during +# parsing, even when -fno-stack-protector is passed afterwards. +CC32FLAGSREMOVE += -mstack-protector-guard% +endif +LD32FLAGS := -Wl,-soname=linux-vdso32.so.1 +AS32FLAGS := -D__VDSO32__ -CC64FLAGS := -Wl,-soname=linux-vdso64.so.1 -AS64FLAGS := -D__VDSO64__ -s +LD64FLAGS := -Wl,-soname=linux-vdso64.so.1 +AS64FLAGS := -D__VDSO64__ targets += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -Upowerpc targets += vdso64.lds -CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) +CPPFLAGS_vdso64.lds += -P -C # link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o FORCE +$(obj)/vdso32.so.dbg: $(obj)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o $(obj)/vgetrandom-32.o $(obj)/crtsavres-32.o FORCE $(call if_changed,vdso32ld_and_check) -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o FORCE +$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o $(obj)/vgetrandom-64.o FORCE $(call if_changed,vdso64ld_and_check) # assembly rules for the .S files $(obj-vdso32): %-32.o: %.S FORCE $(call if_changed_dep,vdso32as) +$(obj)/crtsavres-32.o: %-32.o: $(srctree)/arch/powerpc/lib/crtsavres.S FORCE + $(call if_changed_dep,vdso32as) $(obj)/vgettimeofday-32.o: %-32.o: %.c FORCE $(call if_changed_dep,vdso32cc) +$(obj)/vgetrandom-32.o: %-32.o: %.c FORCE + $(call if_changed_dep,vdso32cc) $(obj-vdso64): %-64.o: %.S FORCE $(call if_changed_dep,vdso64as) $(obj)/vgettimeofday-64.o: %-64.o: %.c FORCE $(call if_changed_dep,cc_o_c) +$(obj)/vgetrandom-64.o: %-64.o: %.c FORCE + $(call if_changed_dep,cc_o_c) # Generate VDSO offsets using helper script -gen-vdso32sym := $(srctree)/$(src)/gen_vdso32_offsets.sh +gen-vdso32sym := $(src)/gen_vdso32_offsets.sh quiet_cmd_vdso32sym = VDSO32SYM $@ cmd_vdso32sym = $(NM) $< | $(gen-vdso32sym) | LC_ALL=C sort > $@ -gen-vdso64sym := $(srctree)/$(src)/gen_vdso64_offsets.sh +gen-vdso64sym := $(src)/gen_vdso64_offsets.sh quiet_cmd_vdso64sym = VDSO64SYM $@ cmd_vdso64sym = $(NM) $< | $(gen-vdso64sym) | LC_ALL=C sort > $@ @@ -92,15 +111,13 @@ include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE # actual build commands quiet_cmd_vdso32ld_and_check = VDSO32L $@ - cmd_vdso32ld_and_check = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) -z noexecstack ; $(cmd_vdso_check) + cmd_vdso32ld_and_check = $(VDSOCC) $(ldflags-y) $(CC32FLAGS) $(LD32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check) quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) $(AS32FLAGS) -c -o $@ $< quiet_cmd_vdso32cc = VDSO32C $@ - cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $< + cmd_vdso32cc = $(VDSOCC) $(filter-out $(CC32FLAGSREMOVE), $(c_flags)) $(CC32FLAGS) -c -o $@ $< quiet_cmd_vdso64ld_and_check = VDSO64L $@ - cmd_vdso64ld_and_check = $(VDSOCC) $(c_flags) $(CC64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) -z noexecstack ; $(cmd_vdso_check) + cmd_vdso64ld_and_check = $(VDSOCC) $(ldflags-y) $(LD64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check) quiet_cmd_vdso64as = VDSO64A $@ - cmd_vdso64as = $(VDSOCC) $(a_flags) $(CC64FLAGS) $(AS64FLAGS) -c -o $@ $< - -OBJECT_FILES_NON_STANDARD := y + cmd_vdso64as = $(VDSOCC) $(a_flags) $(AS64FLAGS) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso/cacheflush.S b/arch/powerpc/kernel/vdso/cacheflush.S index 0085ae464dac..488d3ade11e6 100644 --- a/arch/powerpc/kernel/vdso/cacheflush.S +++ b/arch/powerpc/kernel/vdso/cacheflush.S @@ -30,7 +30,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) #ifdef CONFIG_PPC64 mflr r12 .cfi_register lr,r12 - get_datapage r10 + get_datapage r10 vdso_u_arch_data mtlr r12 .cfi_restore lr #endif diff --git a/arch/powerpc/kernel/vdso/datapage.S b/arch/powerpc/kernel/vdso/datapage.S index db8e167f0166..d23b2e8e2a34 100644 --- a/arch/powerpc/kernel/vdso/datapage.S +++ b/arch/powerpc/kernel/vdso/datapage.S @@ -28,7 +28,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mflr r12 .cfi_register lr,r12 mr. r4,r3 - get_datapage r3 + get_datapage r3 vdso_u_arch_data mtlr r12 #ifdef __powerpc64__ addi r3,r3,CFG_SYSCALL_MAP64 @@ -52,7 +52,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) .cfi_startproc mflr r12 .cfi_register lr,r12 - get_datapage r3 + get_datapage r3 vdso_u_arch_data #ifndef __powerpc64__ lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) #endif diff --git a/arch/powerpc/kernel/vdso/getrandom.S b/arch/powerpc/kernel/vdso/getrandom.S new file mode 100644 index 000000000000..a80d9fb436f7 --- /dev/null +++ b/arch/powerpc/kernel/vdso/getrandom.S @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Userland implementation of getrandom() for processes + * for use in the vDSO + * + * Copyright (C) 2024 Christophe Leroy <christophe.leroy@csgroup.eu>, CS GROUP France + */ +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/vdso.h> +#include <asm/vdso_datapage.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + +/* + * The macro sets two stack frames, one for the caller and one for the callee + * because there are no requirement for the caller to set a stack frame when + * calling VDSO so it may have omitted to set one, especially on PPC64 + */ + +.macro cvdso_call funct + .cfi_startproc + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + .cfi_adjust_cfa_offset PPC_MIN_STKFRM + mflr r0 + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + .cfi_adjust_cfa_offset PPC_MIN_STKFRM + PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) + .cfi_rel_offset lr, PPC_MIN_STKFRM + PPC_LR_STKOFF +#ifdef __powerpc64__ + PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) + .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT +#endif + bl CFUNC(DOTSYM(\funct)) + PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) + .cfi_restore r2 +#endif + cmpwi r3, 0 + mtlr r0 + addi r1, r1, 2 * PPC_MIN_STKFRM + .cfi_restore lr + .cfi_def_cfa_offset 0 + crclr so + bgelr+ + crset so + neg r3, r3 + blr + .cfi_endproc +.endm + + .text +V_FUNCTION_BEGIN(__kernel_getrandom) + cvdso_call __c_kernel_getrandom +V_FUNCTION_END(__kernel_getrandom) diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S index 0c4ecc8fec5a..79c967212444 100644 --- a/arch/powerpc/kernel/vdso/gettimeofday.S +++ b/arch/powerpc/kernel/vdso/gettimeofday.S @@ -32,13 +32,12 @@ PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT #endif - get_datapage r5 .ifeq \call_time - addi r5, r5, VDSO_DATA_OFFSET + get_datapage r5 vdso_u_time_data .else - addi r4, r5, VDSO_DATA_OFFSET + get_datapage r4 vdso_u_time_data .endif - bl DOTSYM(\funct) + bl CFUNC(DOTSYM(\funct)) PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) #ifdef __powerpc64__ PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) @@ -114,16 +113,3 @@ V_FUNCTION_END(__kernel_clock_getres) V_FUNCTION_BEGIN(__kernel_time) cvdso_call __c_kernel_time call_time=1 V_FUNCTION_END(__kernel_time) - -/* Routines for restoring integer registers, called by the compiler. */ -/* Called with r11 pointing to the stack header word of the caller of the */ -/* function, just beyond the end of the integer restore area. */ -#ifndef __powerpc64__ -_GLOBAL(_restgpr_31_x) -_GLOBAL(_rest32gpr_31_x) - lwz r0,4(r11) - lwz r31,-4(r11) - mtlr r0 - mr r1,r11 - blr -#endif diff --git a/arch/powerpc/kernel/vdso/vdso32.lds.S b/arch/powerpc/kernel/vdso/vdso32.lds.S index bc0be274a9ac..72a1012b8a20 100644 --- a/arch/powerpc/kernel/vdso/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso/vdso32.lds.S @@ -6,6 +6,7 @@ #include <asm/vdso.h> #include <asm/page.h> #include <asm-generic/vmlinux.lds.h> +#include <vdso/datapage.h> #ifdef __LITTLE_ENDIAN__ OUTPUT_FORMAT("elf32-powerpcle", "elf32-powerpcle", "elf32-powerpcle") @@ -16,7 +17,8 @@ OUTPUT_ARCH(powerpc:common) SECTIONS { - PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE); + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text @@ -74,6 +76,8 @@ SECTIONS .got : { *(.got) } :text .plt : { *(.plt) } + .rela.dyn : { *(.rela .rela*) } + _end = .; __end = .; PROVIDE(end = .); @@ -83,9 +87,11 @@ SECTIONS /DISCARD/ : { *(.note.GNU-stack) + *(*.EMB.apuinfo) + *(.branch_lt) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) - *(.got1) + *(.got1 .glink .iplt) } } @@ -126,6 +132,7 @@ VERSION #if defined(CONFIG_PPC64) || !defined(CONFIG_SMP) __kernel_getcpu; #endif + __kernel_getrandom; local: *; }; diff --git a/arch/powerpc/kernel/vdso/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S index 744ae5363e6c..32102a05eaa7 100644 --- a/arch/powerpc/kernel/vdso/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso/vdso64.lds.S @@ -6,6 +6,7 @@ #include <asm/vdso.h> #include <asm/page.h> #include <asm-generic/vmlinux.lds.h> +#include <vdso/datapage.h> #ifdef __LITTLE_ENDIAN__ OUTPUT_FORMAT("elf64-powerpcle", "elf64-powerpcle", "elf64-powerpcle") @@ -16,7 +17,8 @@ OUTPUT_ARCH(powerpc:common64) SECTIONS { - PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE); + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text @@ -32,7 +34,7 @@ SECTIONS . = ALIGN(16); .text : { *(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*) - *(.sfpr .glink) + *(.sfpr) } :text PROVIDE(__etext = .); PROVIDE(_etext = .); @@ -69,7 +71,7 @@ SECTIONS .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr .eh_frame : { KEEP (*(.eh_frame)) } :text .gcc_except_table : { *(.gcc_except_table) } - .rela.dyn ALIGN(8) : { *(.rela.dyn) } + .rela.dyn ALIGN(8) : { *(.rela .rela*) } .got ALIGN(8) : { *(.got .toc) } @@ -81,10 +83,12 @@ SECTIONS /DISCARD/ : { *(.note.GNU-stack) + *(*.EMB.apuinfo) *(.branch_lt) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) *(.opd) + *(.glink .iplt .plt) } } @@ -121,6 +125,7 @@ VERSION __kernel_sigtramp_rt64; __kernel_getcpu; __kernel_time; + __kernel_getrandom; local: *; }; diff --git a/arch/powerpc/kernel/vdso/vgetrandom-chacha.S b/arch/powerpc/kernel/vdso/vgetrandom-chacha.S new file mode 100644 index 000000000000..7f9061a9e8b4 --- /dev/null +++ b/arch/powerpc/kernel/vdso/vgetrandom-chacha.S @@ -0,0 +1,365 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Christophe Leroy <christophe.leroy@csgroup.eu>, CS GROUP France + */ + +#include <linux/linkage.h> + +#include <asm/ppc_asm.h> + +#define dst_bytes r3 +#define key r4 +#define counter r5 +#define nblocks r6 + +#define idx_r0 r0 +#define val4 r4 + +#define const0 0x61707865 +#define const1 0x3320646e +#define const2 0x79622d32 +#define const3 0x6b206574 + +#define key0 r5 +#define key1 r6 +#define key2 r7 +#define key3 r8 +#define key4 r9 +#define key5 r10 +#define key6 r11 +#define key7 r12 + +#define counter0 r14 +#define counter1 r15 + +#define state0 r16 +#define state1 r17 +#define state2 r18 +#define state3 r19 +#define state4 r20 +#define state5 r21 +#define state6 r22 +#define state7 r23 +#define state8 r24 +#define state9 r25 +#define state10 r26 +#define state11 r27 +#define state12 r28 +#define state13 r29 +#define state14 r30 +#define state15 r31 + +.macro quarterround4 a1 b1 c1 d1 a2 b2 c2 d2 a3 b3 c3 d3 a4 b4 c4 d4 + add \a1, \a1, \b1 + add \a2, \a2, \b2 + add \a3, \a3, \b3 + add \a4, \a4, \b4 + xor \d1, \d1, \a1 + xor \d2, \d2, \a2 + xor \d3, \d3, \a3 + xor \d4, \d4, \a4 + rotlwi \d1, \d1, 16 + rotlwi \d2, \d2, 16 + rotlwi \d3, \d3, 16 + rotlwi \d4, \d4, 16 + add \c1, \c1, \d1 + add \c2, \c2, \d2 + add \c3, \c3, \d3 + add \c4, \c4, \d4 + xor \b1, \b1, \c1 + xor \b2, \b2, \c2 + xor \b3, \b3, \c3 + xor \b4, \b4, \c4 + rotlwi \b1, \b1, 12 + rotlwi \b2, \b2, 12 + rotlwi \b3, \b3, 12 + rotlwi \b4, \b4, 12 + add \a1, \a1, \b1 + add \a2, \a2, \b2 + add \a3, \a3, \b3 + add \a4, \a4, \b4 + xor \d1, \d1, \a1 + xor \d2, \d2, \a2 + xor \d3, \d3, \a3 + xor \d4, \d4, \a4 + rotlwi \d1, \d1, 8 + rotlwi \d2, \d2, 8 + rotlwi \d3, \d3, 8 + rotlwi \d4, \d4, 8 + add \c1, \c1, \d1 + add \c2, \c2, \d2 + add \c3, \c3, \d3 + add \c4, \c4, \d4 + xor \b1, \b1, \c1 + xor \b2, \b2, \c2 + xor \b3, \b3, \c3 + xor \b4, \b4, \c4 + rotlwi \b1, \b1, 7 + rotlwi \b2, \b2, 7 + rotlwi \b3, \b3, 7 + rotlwi \b4, \b4, 7 +.endm + +#define QUARTERROUND4(a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,a4,b4,c4,d4) \ + quarterround4 state##a1 state##b1 state##c1 state##d1 \ + state##a2 state##b2 state##c2 state##d2 \ + state##a3 state##b3 state##c3 state##d3 \ + state##a4 state##b4 state##c4 state##d4 + +/* + * Very basic 32 bits implementation of ChaCha20. Produces a given positive number + * of blocks of output with a nonce of 0, taking an input key and 8-byte + * counter. Importantly does not spill to the stack. Its arguments are: + * + * r3: output bytes + * r4: 32-byte key input + * r5: 8-byte counter input/output (saved on stack) + * r6: number of 64-byte blocks to write to output + * + * r0: counter of blocks (initialised with r6) + * r4: Value '4' after key has been read. + * r5-r12: key + * r14-r15: counter + * r16-r31: state + */ +SYM_FUNC_START(__arch_chacha20_blocks_nostack) +#ifdef __powerpc64__ + std counter, -216(r1) + + std r14, -144(r1) + std r15, -136(r1) + std r16, -128(r1) + std r17, -120(r1) + std r18, -112(r1) + std r19, -104(r1) + std r20, -96(r1) + std r21, -88(r1) + std r22, -80(r1) + std r23, -72(r1) + std r24, -64(r1) + std r25, -56(r1) + std r26, -48(r1) + std r27, -40(r1) + std r28, -32(r1) + std r29, -24(r1) + std r30, -16(r1) + std r31, -8(r1) +#else + stwu r1, -96(r1) + stw counter, 20(r1) +#ifdef __BIG_ENDIAN__ + stmw r14, 24(r1) +#else + stw r14, 24(r1) + stw r15, 28(r1) + stw r16, 32(r1) + stw r17, 36(r1) + stw r18, 40(r1) + stw r19, 44(r1) + stw r20, 48(r1) + stw r21, 52(r1) + stw r22, 56(r1) + stw r23, 60(r1) + stw r24, 64(r1) + stw r25, 68(r1) + stw r26, 72(r1) + stw r27, 76(r1) + stw r28, 80(r1) + stw r29, 84(r1) + stw r30, 88(r1) + stw r31, 92(r1) +#endif +#endif /* __powerpc64__ */ + + lwz counter0, 0(counter) + lwz counter1, 4(counter) +#ifdef __powerpc64__ + rldimi counter0, counter1, 32, 0 +#endif + mr idx_r0, nblocks + subi dst_bytes, dst_bytes, 4 + + lwz key0, 0(key) + lwz key1, 4(key) + lwz key2, 8(key) + lwz key3, 12(key) + lwz key4, 16(key) + lwz key5, 20(key) + lwz key6, 24(key) + lwz key7, 28(key) + + li val4, 4 +.Lblock: + li r31, 10 + + lis state0, const0@ha + lis state1, const1@ha + lis state2, const2@ha + lis state3, const3@ha + addi state0, state0, const0@l + addi state1, state1, const1@l + addi state2, state2, const2@l + addi state3, state3, const3@l + + mtctr r31 + + mr state4, key0 + mr state5, key1 + mr state6, key2 + mr state7, key3 + mr state8, key4 + mr state9, key5 + mr state10, key6 + mr state11, key7 + + mr state12, counter0 + mr state13, counter1 + + li state14, 0 + li state15, 0 + +.Lpermute: + QUARTERROUND4( 0, 4, 8,12, 1, 5, 9,13, 2, 6,10,14, 3, 7,11,15) + QUARTERROUND4( 0, 5,10,15, 1, 6,11,12, 2, 7, 8,13, 3, 4, 9,14) + + bdnz .Lpermute + + addis state0, state0, const0@ha + addis state1, state1, const1@ha + addis state2, state2, const2@ha + addis state3, state3, const3@ha + addi state0, state0, const0@l + addi state1, state1, const1@l + addi state2, state2, const2@l + addi state3, state3, const3@l + + add state4, state4, key0 + add state5, state5, key1 + add state6, state6, key2 + add state7, state7, key3 + add state8, state8, key4 + add state9, state9, key5 + add state10, state10, key6 + add state11, state11, key7 + + add state12, state12, counter0 + add state13, state13, counter1 + +#ifdef __BIG_ENDIAN__ + stwbrx state0, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state1, 0, dst_bytes + stwbrx state2, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state3, 0, dst_bytes + stwbrx state4, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state5, 0, dst_bytes + stwbrx state6, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state7, 0, dst_bytes + stwbrx state8, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state9, 0, dst_bytes + stwbrx state10, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state11, 0, dst_bytes + stwbrx state12, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state13, 0, dst_bytes + stwbrx state14, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state15, 0, dst_bytes +#else + stw state0, 4(dst_bytes) + stw state1, 8(dst_bytes) + stw state2, 12(dst_bytes) + stw state3, 16(dst_bytes) + stw state4, 20(dst_bytes) + stw state5, 24(dst_bytes) + stw state6, 28(dst_bytes) + stw state7, 32(dst_bytes) + stw state8, 36(dst_bytes) + stw state9, 40(dst_bytes) + stw state10, 44(dst_bytes) + stw state11, 48(dst_bytes) + stw state12, 52(dst_bytes) + stw state13, 56(dst_bytes) + stw state14, 60(dst_bytes) + stwu state15, 64(dst_bytes) +#endif + + subic. idx_r0, idx_r0, 1 /* subi. can't use r0 as source */ + +#ifdef __powerpc64__ + addi counter0, counter0, 1 + srdi counter1, counter0, 32 +#else + addic counter0, counter0, 1 + addze counter1, counter1 +#endif + + bne .Lblock + +#ifdef __powerpc64__ + ld counter, -216(r1) +#else + lwz counter, 20(r1) +#endif + stw counter0, 0(counter) + stw counter1, 4(counter) + + li r6, 0 + li r7, 0 + li r8, 0 + li r9, 0 + li r10, 0 + li r11, 0 + li r12, 0 + +#ifdef __powerpc64__ + ld r14, -144(r1) + ld r15, -136(r1) + ld r16, -128(r1) + ld r17, -120(r1) + ld r18, -112(r1) + ld r19, -104(r1) + ld r20, -96(r1) + ld r21, -88(r1) + ld r22, -80(r1) + ld r23, -72(r1) + ld r24, -64(r1) + ld r25, -56(r1) + ld r26, -48(r1) + ld r27, -40(r1) + ld r28, -32(r1) + ld r29, -24(r1) + ld r30, -16(r1) + ld r31, -8(r1) +#else +#ifdef __BIG_ENDIAN__ + lmw r14, 24(r1) +#else + lwz r14, 24(r1) + lwz r15, 28(r1) + lwz r16, 32(r1) + lwz r17, 36(r1) + lwz r18, 40(r1) + lwz r19, 44(r1) + lwz r20, 48(r1) + lwz r21, 52(r1) + lwz r22, 56(r1) + lwz r23, 60(r1) + lwz r24, 64(r1) + lwz r25, 68(r1) + lwz r26, 72(r1) + lwz r27, 76(r1) + lwz r28, 80(r1) + lwz r29, 84(r1) + lwz r30, 88(r1) + lwz r31, 92(r1) +#endif + addi r1, r1, 96 +#endif /* __powerpc64__ */ + blr +SYM_FUNC_END(__arch_chacha20_blocks_nostack) diff --git a/arch/powerpc/kernel/vdso/vgetrandom.c b/arch/powerpc/kernel/vdso/vgetrandom.c new file mode 100644 index 000000000000..cc79b960a541 --- /dev/null +++ b/arch/powerpc/kernel/vdso/vgetrandom.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Powerpc userspace implementation of getrandom() + * + * Copyright (C) 2024 Christophe Leroy <christophe.leroy@csgroup.eu>, CS GROUP France + */ +#include <linux/time.h> +#include <linux/types.h> + +ssize_t __c_kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, + size_t opaque_len) +{ + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); +} diff --git a/arch/powerpc/kernel/vdso/vgettimeofday.c b/arch/powerpc/kernel/vdso/vgettimeofday.c index 55a287c9a736..6f5167d81af5 100644 --- a/arch/powerpc/kernel/vdso/vgettimeofday.c +++ b/arch/powerpc/kernel/vdso/vgettimeofday.c @@ -7,43 +7,43 @@ #ifdef __powerpc64__ int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_clock_gettime_data(vd, clock, ts); } int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_clock_getres_data(vd, clock_id, res); } #else int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_clock_gettime32_data(vd, clock, ts); } int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_clock_gettime_data(vd, clock, ts); } int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_clock_getres_time32_data(vd, clock_id, res); } #endif int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_gettimeofday_data(vd, tv, tz); } -__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd) +__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_time_data *vd) { return __cvdso_time_data(vd, time); } diff --git a/arch/powerpc/kernel/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32_wrapper.S index 10f92f265d51..20bca3548b44 100644 --- a/arch/powerpc/kernel/vdso32_wrapper.S +++ b/arch/powerpc/kernel/vdso32_wrapper.S @@ -2,7 +2,7 @@ #include <linux/linkage.h> #include <asm/page.h> - __PAGE_ALIGNED_DATA + .section ".data..ro_after_init", "aw" .globl vdso32_start, vdso32_end .balign PAGE_SIZE diff --git a/arch/powerpc/kernel/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64_wrapper.S index 839d1a61411d..1912936fa227 100644 --- a/arch/powerpc/kernel/vdso64_wrapper.S +++ b/arch/powerpc/kernel/vdso64_wrapper.S @@ -2,7 +2,7 @@ #include <linux/linkage.h> #include <asm/page.h> - __PAGE_ALIGNED_DATA + .section ".data..ro_after_init", "aw" .globl vdso64_start, vdso64_end .balign PAGE_SIZE diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index ffe5d90abe17..80b3f6e476b6 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/processor.h> #include <asm/ppc_asm.h> @@ -8,7 +9,6 @@ #include <asm/thread_info.h> #include <asm/page.h> #include <asm/ptrace.h> -#include <asm/export.h> #include <asm/asm-compat.h> /* @@ -33,6 +33,7 @@ _GLOBAL(store_vr_state) mfvscr v0 li r4, VRSTATE_VSCR stvx v0, r4, r3 + lvx v0, 0, r3 blr EXPORT_SYMBOL(store_vr_state) @@ -109,6 +110,7 @@ _GLOBAL(save_altivec) mfvscr v0 li r4,VRSTATE_VSCR stvx v0,r4,r7 + lvx v0,0,r7 blr #ifdef CONFIG_VSX @@ -177,10 +179,16 @@ fpone: fphalf: .quad 0x3fe0000000000000 /* 0.5 */ +#ifdef CONFIG_PPC_KERNEL_PCREL +#define LDCONST(fr, name) \ + pla r11,name@pcrel; \ + lfd fr,0(r11) +#else #define LDCONST(fr, name) \ addis r11,r2,name@toc@ha; \ lfd fr,name@toc@l(r11) #endif +#endif .text /* * Internal routine to enable floating point and set FPSCR to 0. diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 958e77a24f85..15850296c0a9 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -1,10 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifdef CONFIG_PPC64 -#define PROVIDE32(x) PROVIDE(__unused__##x) -#else -#define PROVIDE32(x) PROVIDE(x) -#endif - #define BSS_FIRST_SECTIONS *(.bss.prominit) #define EMITS_PT_NOTE #define RO_EXCEPTION_TABLE_ALIGN 0 @@ -107,12 +101,9 @@ SECTIONS #endif /* careful! __ftr_alt_* sections need to be close to .text */ *(.text.hot .text.hot.* TEXT_MAIN .text.fixup .text.unlikely .text.unlikely.* .fixup __ftr_alt_* .ref.text); -#ifdef CONFIG_PPC64 *(.tramp.ftrace.text); -#endif NOINSTR_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT @@ -125,13 +116,11 @@ SECTIONS * included with the main text sections, so put it by itself. */ *(.sfpr); - MEM_KEEP(init.text) - MEM_KEEP(exit.text) + *(.text.asan.* .text.tsan.*) } :text . = ALIGN(PAGE_SIZE); _etext = .; - PROVIDE32 (etext = .); /* Read-only data */ RO_DATA(PAGE_SIZE) @@ -169,12 +158,18 @@ SECTIONS } #else /* CONFIG_PPC32 */ +#ifndef CONFIG_PPC_KERNEL_PCREL .toc1 : AT(ADDR(.toc1) - LOAD_OFFSET) { *(.toc1) } +#endif .got : AT(ADDR(.got) - LOAD_OFFSET) ALIGN(256) { +#ifdef CONFIG_PPC_KERNEL_PCREL + *(.got) +#else *(.got .toc) +#endif } SOFT_MASK_TABLE(8) @@ -263,23 +258,22 @@ SECTIONS .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { _sinittext = .; INIT_TEXT - + *(.tramp.ftrace.init); /* *.init.text might be RO so we must ensure this section ends on * a page boundary. */ . = ALIGN(PAGE_SIZE); _einittext = .; -#ifdef CONFIG_PPC64 - *(.tramp.ftrace.init); -#endif } :text /* .exit.text is discarded at runtime, not link time, * to deal with references from __bug_table */ .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { + __exittext_begin = .; EXIT_TEXT + __exittext_end = .; } . = ALIGN(PAGE_SIZE); @@ -336,7 +330,6 @@ SECTIONS } .hash : AT(ADDR(.hash) - LOAD_OFFSET) { *(.hash) } .gnu.hash : AT(ADDR(.gnu.hash) - LOAD_OFFSET) { *(.gnu.hash) } - .interp : AT(ADDR(.interp) - LOAD_OFFSET) { *(.interp) } .rela.dyn : AT(ADDR(.rela.dyn) - LOAD_OFFSET) { __rela_dyn_start = .; @@ -393,7 +386,6 @@ SECTIONS . = ALIGN(PAGE_SIZE); _edata = .; - PROVIDE32 (edata = .); /* * And finally the bss @@ -403,7 +395,6 @@ SECTIONS . = ALIGN(PAGE_SIZE); _end = . ; - PROVIDE32 (end = .); DWARF_DEBUG ELF_DETAILS diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index dbcc4a793f0b..2429cb1c7baa 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -245,7 +245,7 @@ static void watchdog_smp_panic(int cpu) __cpumask_clear_cpu(c, &wd_smp_cpus_ipi); } } else { - trigger_allbutself_cpu_backtrace(); + trigger_allbutcpu_cpu_backtrace(cpu); cpumask_clear(&wd_smp_cpus_ipi); } @@ -416,7 +416,7 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt) xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi if (sysctl_hardlockup_all_cpu_backtrace) - trigger_allbutself_cpu_backtrace(); + trigger_allbutcpu_cpu_backtrace(cpu); if (hardlockup_panic) nmi_panic(regs, "Hard LOCKUP"); @@ -438,7 +438,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { int cpu = smp_processor_id(); - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED)) return HRTIMER_NORESTART; if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) @@ -479,7 +479,7 @@ static void start_watchdog(void *arg) return; } - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED)) return; if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) @@ -495,8 +495,7 @@ static void start_watchdog(void *arg) *this_cpu_ptr(&wd_timer_tb) = get_tb(); - hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hrtimer->function = watchdog_timer_fn; + hrtimer_setup(hrtimer, watchdog_timer_fn, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer_start(hrtimer, ms_to_ktime(wd_timer_period_ms), HRTIMER_MODE_REL_PINNED); } @@ -546,7 +545,7 @@ static void watchdog_calc_timeouts(void) wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5; } -void watchdog_nmi_stop(void) +void watchdog_hardlockup_stop(void) { int cpu; @@ -554,7 +553,7 @@ void watchdog_nmi_stop(void) stop_watchdog_on_cpu(cpu); } -void watchdog_nmi_start(void) +void watchdog_hardlockup_start(void) { int cpu; @@ -566,7 +565,7 @@ void watchdog_nmi_start(void) /* * Invoked from core watchdog init. */ -int __init watchdog_nmi_probe(void) +int __init watchdog_hardlockup_probe(void) { int err; @@ -582,7 +581,7 @@ int __init watchdog_nmi_probe(void) } #ifdef CONFIG_PPC_PSERIES -void watchdog_nmi_set_timeout_pct(u64 pct) +void watchdog_hardlockup_set_timeout_pct(u64 pct) { pr_info("Set the NMI watchdog timeout factor to %llu%%\n", pct); WRITE_ONCE(wd_timeout_pct, pct); |
