diff options
Diffstat (limited to 'arch/s390/kernel')
101 files changed, 3984 insertions, 5845 deletions
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 48caae8c7e10..42c83d60d6fa 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -36,16 +36,17 @@ CFLAGS_stacktrace.o += -fno-optimize-sibling-calls CFLAGS_dumpstack.o += -fno-optimize-sibling-calls CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls -obj-y := head64.o traps.o time.o process.o early.o setup.o idle.o vtime.o +obj-y := head.o traps.o time.o process.o early.o setup.o idle.o vtime.o obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o -obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o +obj-y += debug.o irq.o ipl.o dis.o vdso.o cpufeature.o obj-y += sysinfo.o lgr.o os_info.o ctlreg.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o -obj-y += entry.o reipl.o kdebugfs.o alternative.o +obj-y += entry.o reipl.o kdebugfs.o alternative.o skey.o obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o +obj-y += diag/ -extra-y += vmlinux.lds +always-$(KBUILD_BUILTIN) += vmlinux.lds obj-$(CONFIG_SYSFS) += nospec-sysfs.o CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) @@ -55,9 +56,6 @@ obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o hiperdispatch.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_AUDIT) += audit.o -compat-obj-$(CONFIG_AUDIT) += compat_audit.o -obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o -obj-$(CONFIG_COMPAT) += $(compat-obj-y) obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KPROBES) += mcount.o @@ -69,7 +67,7 @@ obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o - +obj-$(CONFIG_STACKPROTECTOR) += stackprotector.o obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o obj-$(CONFIG_CERT_STORE) += cert_store.o @@ -78,10 +76,9 @@ obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o -obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o perf_pai_ext.o +obj-$(CONFIG_PERF_EVENTS) += perf_pai.o obj-$(CONFIG_TRACEPOINTS) += trace.o # vdso -obj-y += vdso64/ -obj-$(CONFIG_COMPAT) += vdso32/ +obj-y += vdso/ diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c index 09cd24cbe74e..6252b7d115dd 100644 --- a/arch/s390/kernel/abs_lowcore.c +++ b/arch/s390/kernel/abs_lowcore.c @@ -2,9 +2,9 @@ #include <linux/pgtable.h> #include <asm/abs_lowcore.h> +#include <asm/sections.h> unsigned long __bootdata_preserved(__abs_lowcore); -int __bootdata_preserved(relocate_lowcore); int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc) { diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index 8d5d0de35de0..90c0e6408992 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -1,41 +1,90 @@ // SPDX-License-Identifier: GPL-2.0 +#ifndef pr_fmt +#define pr_fmt(fmt) "alt: " fmt +#endif + #include <linux/uaccess.h> +#include <linux/printk.h> #include <asm/nospec-branch.h> #include <asm/abs_lowcore.h> #include <asm/alternative.h> #include <asm/facility.h> +#include <asm/sections.h> +#include <asm/machine.h> + +#ifndef a_debug +#define a_debug pr_debug +#endif + +#ifndef __kernel_va +#define __kernel_va(x) (void *)(x) +#endif + +unsigned long __bootdata_preserved(machine_features[1]); + +struct alt_debug { + unsigned long facilities[MAX_FACILITY_BIT / BITS_PER_LONG]; + unsigned long mfeatures[MAX_MFEATURE_BIT / BITS_PER_LONG]; + int spec; +}; + +static struct alt_debug __bootdata_preserved(alt_debug); + +static void alternative_dump(u8 *old, u8 *new, unsigned int len, unsigned int type, unsigned int data) +{ + char oinsn[33], ninsn[33]; + unsigned long kptr; + unsigned int pos; + + for (pos = 0; pos < len && 2 * pos < sizeof(oinsn) - 3; pos++) + hex_byte_pack(&oinsn[2 * pos], old[pos]); + oinsn[2 * pos] = 0; + for (pos = 0; pos < len && 2 * pos < sizeof(ninsn) - 3; pos++) + hex_byte_pack(&ninsn[2 * pos], new[pos]); + ninsn[2 * pos] = 0; + kptr = (unsigned long)__kernel_va(old); + a_debug("[%d/%3d] %016lx: %s -> %s\n", type, data, kptr, oinsn, ninsn); +} void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx) { - u8 *instr, *replacement; + struct alt_debug *d; struct alt_instr *a; - bool replace; + bool debug, replace; + u8 *old, *new; /* * The scan order should be from start to end. A later scanned * alternative code can overwrite previously scanned alternative code. */ + d = &alt_debug; for (a = start; a < end; a++) { if (!(a->ctx & ctx)) continue; switch (a->type) { case ALT_TYPE_FACILITY: replace = test_facility(a->data); + debug = __test_facility(a->data, d->facilities); + break; + case ALT_TYPE_FEATURE: + replace = test_machine_feature(a->data); + debug = __test_machine_feature(a->data, d->mfeatures); break; case ALT_TYPE_SPEC: replace = nobp_enabled(); - break; - case ALT_TYPE_LOWCORE: - replace = have_relocated_lowcore(); + debug = d->spec; break; default: replace = false; + debug = false; } if (!replace) continue; - instr = (u8 *)&a->instr_offset + a->instr_offset; - replacement = (u8 *)&a->repl_offset + a->repl_offset; - s390_kernel_write(instr, replacement, a->instrlen); + old = (u8 *)&a->instr_offset + a->instr_offset; + new = (u8 *)&a->repl_offset + a->repl_offset; + if (debug) + alternative_dump(old, new, a->instrlen, a->type, a->data); + s390_kernel_write(old, new, a->instrlen); } } diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 862a9140528e..e1a5b5b54e4f 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -4,16 +4,16 @@ * This code generates raw asm output which is post-processed to extract * and format the required data. */ - -#define ASM_OFFSETS_C +#define COMPILE_OFFSETS #include <linux/kbuild.h> -#include <linux/kvm_host.h> #include <linux/sched.h> #include <linux/purgatory.h> #include <linux/pgtable.h> -#include <linux/ftrace.h> +#include <linux/ftrace_regs.h> +#include <asm/kvm_host_types.h> #include <asm/stacktrace.h> +#include <asm/ptrace.h> int main(void) { @@ -21,6 +21,9 @@ int main(void) OFFSET(__TASK_stack, task_struct, stack); OFFSET(__TASK_thread, task_struct, thread); OFFSET(__TASK_pid, task_struct, pid); +#ifdef CONFIG_STACKPROTECTOR + OFFSET(__TASK_stack_canary, task_struct, stack_canary); +#endif BLANK(); /* thread struct offsets */ OFFSET(__THREAD_ksp, thread_struct, ksp); @@ -49,8 +52,8 @@ int main(void) OFFSET(__PT_R14, pt_regs, gprs[14]); OFFSET(__PT_R15, pt_regs, gprs[15]); OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2); + OFFSET(__PT_INT_CODE, pt_regs, int_code); OFFSET(__PT_FLAGS, pt_regs, flags); - OFFSET(__PT_CR1, pt_regs, cr1); OFFSET(__PT_LAST_BREAK, pt_regs, last_break); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); @@ -64,6 +67,7 @@ int main(void) OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags); OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys); OFFSET(__SF_SIE_GUEST_ASCE, stack_frame, sie_guest_asce); + OFFSET(__SF_SIE_IRQ, stack_frame, sie_irq); DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame)); BLANK(); OFFSET(__SFUSER_BACKCHAIN, stack_frame_user, back_chain); @@ -76,7 +80,8 @@ int main(void) OFFSET(__LC_EXT_CPU_ADDR, lowcore, ext_cpu_addr); OFFSET(__LC_EXT_INT_CODE, lowcore, ext_int_code); OFFSET(__LC_PGM_ILC, lowcore, pgm_ilc); - OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_code); + OFFSET(__LC_PGM_CODE, lowcore, pgm_code); + OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_int_code); OFFSET(__LC_DATA_EXC_CODE, lowcore, data_exc_code); OFFSET(__LC_MON_CLASS_NR, lowcore, mon_class_num); OFFSET(__LC_PER_CODE, lowcore, per_code); @@ -122,7 +127,6 @@ int main(void) OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer); OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock); OFFSET(__LC_INT_CLOCK, lowcore, int_clock); - OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock); OFFSET(__LC_CURRENT, lowcore, current_task); OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack); OFFSET(__LC_ASYNC_STACK, lowcore, async_stack); @@ -139,6 +143,7 @@ int main(void) OFFSET(__LC_CURRENT_PID, lowcore, current_pid); OFFSET(__LC_LAST_BREAK, lowcore, last_break); /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ + OFFSET(__LC_STACK_CANARY, lowcore, stack_canary); OFFSET(__LC_DUMP_REIPL, lowcore, ipib); OFFSET(__LC_VMCORE_INFO, lowcore, vmcore_info); OFFSET(__LC_OS_INFO, lowcore, os_info); @@ -175,12 +180,6 @@ int main(void) DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size)); DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line)); DEFINE(MAX_COMMAND_LINE_SIZE, PARMAREA + offsetof(struct parmarea, max_command_line_size)); -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - /* function graph return value tracing */ - OFFSET(__FGRAPH_RET_GPR2, fgraph_ret_regs, gpr2); - OFFSET(__FGRAPH_RET_FP, fgraph_ret_regs, fp); - DEFINE(__FGRAPH_RET_SIZE, sizeof(struct fgraph_ret_regs)); -#endif OFFSET(__FTRACE_REGS_PT_REGS, __arch_ftrace_regs, regs); DEFINE(__FTRACE_REGS_SIZE, sizeof(struct __arch_ftrace_regs)); diff --git a/arch/s390/kernel/audit.c b/arch/s390/kernel/audit.c index 02051a596b87..7897d9411e13 100644 --- a/arch/s390/kernel/audit.c +++ b/arch/s390/kernel/audit.c @@ -3,7 +3,6 @@ #include <linux/types.h> #include <linux/audit.h> #include <asm/unistd.h> -#include "audit.h" static unsigned dir_class[] = { #include <asm-generic/audit_dir_write.h> @@ -32,19 +31,11 @@ static unsigned signal_class[] = { int audit_classify_arch(int arch) { -#ifdef CONFIG_COMPAT - if (arch == AUDIT_ARCH_S390) - return 1; -#endif return 0; } int audit_classify_syscall(int abi, unsigned syscall) { -#ifdef CONFIG_COMPAT - if (abi == AUDIT_ARCH_S390) - return s390_classify_syscall(syscall); -#endif switch(syscall) { case __NR_open: return AUDITSC_OPEN; @@ -63,13 +54,6 @@ int audit_classify_syscall(int abi, unsigned syscall) static int __init audit_classes_init(void) { -#ifdef CONFIG_COMPAT - audit_register_class(AUDIT_CLASS_WRITE_32, s390_write_class); - audit_register_class(AUDIT_CLASS_READ_32, s390_read_class); - audit_register_class(AUDIT_CLASS_DIR_WRITE_32, s390_dir_class); - audit_register_class(AUDIT_CLASS_CHATTR_32, s390_chattr_class); - audit_register_class(AUDIT_CLASS_SIGNAL_32, s390_signal_class); -#endif audit_register_class(AUDIT_CLASS_WRITE, write_class); audit_register_class(AUDIT_CLASS_READ, read_class); audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class); diff --git a/arch/s390/kernel/audit.h b/arch/s390/kernel/audit.h deleted file mode 100644 index 4d4b596412ec..000000000000 --- a/arch/s390/kernel/audit.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ARCH_S390_KERNEL_AUDIT_H -#define __ARCH_S390_KERNEL_AUDIT_H - -#include <linux/types.h> - -#ifdef CONFIG_COMPAT -extern int s390_classify_syscall(unsigned); -extern __u32 s390_dir_class[]; -extern __u32 s390_write_class[]; -extern __u32 s390_read_class[]; -extern __u32 s390_chattr_class[]; -extern __u32 s390_signal_class[]; -#endif /* CONFIG_COMPAT */ - -#endif /* __ARCH_S390_KERNEL_AUDIT_H */ diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c index bf983513dd33..c217a5e64094 100644 --- a/arch/s390/kernel/cert_store.c +++ b/arch/s390/kernel/cert_store.c @@ -138,7 +138,7 @@ static void cert_store_key_describe(const struct key *key, struct seq_file *m) * First 64 bytes of the key description is key name in EBCDIC CP 500. * Convert it to ASCII for displaying in /proc/keys. */ - strscpy(ascii, key->description, sizeof(ascii)); + strscpy(ascii, key->description); EBCASC_500(ascii, VC_NAME_LEN_BYTES); seq_puts(m, ascii); @@ -235,7 +235,7 @@ static int __diag320(unsigned long subcode, void *addr) { union register_pair rp = { .even = (unsigned long)addr, }; - asm volatile( + asm_inline volatile( " diag %[rp],%[subcode],0x320\n" "0: nopr %%r7\n" EX_TABLE(0b, 0b) diff --git a/arch/s390/kernel/compat_audit.c b/arch/s390/kernel/compat_audit.c deleted file mode 100644 index a7c46e8310f0..000000000000 --- a/arch/s390/kernel/compat_audit.c +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#undef __s390x__ -#include <linux/audit_arch.h> -#include <asm/unistd.h> -#include "audit.h" - -unsigned s390_dir_class[] = { -#include <asm-generic/audit_dir_write.h> -~0U -}; - -unsigned s390_chattr_class[] = { -#include <asm-generic/audit_change_attr.h> -~0U -}; - -unsigned s390_write_class[] = { -#include <asm-generic/audit_write.h> -~0U -}; - -unsigned s390_read_class[] = { -#include <asm-generic/audit_read.h> -~0U -}; - -unsigned s390_signal_class[] = { -#include <asm-generic/audit_signal.h> -~0U -}; - -int s390_classify_syscall(unsigned syscall) -{ - switch(syscall) { - case __NR_open: - return AUDITSC_OPEN; - case __NR_openat: - return AUDITSC_OPENAT; - case __NR_socketcall: - return AUDITSC_SOCKETCALL; - case __NR_execve: - return AUDITSC_EXECVE; - case __NR_openat2: - return AUDITSC_OPENAT2; - default: - return AUDITSC_COMPAT; - } -} diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c deleted file mode 100644 index f9d418d1b619..000000000000 --- a/arch/s390/kernel/compat_linux.c +++ /dev/null @@ -1,289 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * S390 version - * Copyright IBM Corp. 2000 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Gerhard Tonn (ton@de.ibm.com) - * Thomas Spatzier (tspat@de.ibm.com) - * - * Conversion between 31bit and 64bit native syscalls. - * - * Heavily inspired by the 32-bit Sparc compat code which is - * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) - * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) - * - */ - - -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/fs.h> -#include <linux/mm.h> -#include <linux/file.h> -#include <linux/signal.h> -#include <linux/resource.h> -#include <linux/times.h> -#include <linux/smp.h> -#include <linux/sem.h> -#include <linux/msg.h> -#include <linux/shm.h> -#include <linux/uio.h> -#include <linux/quota.h> -#include <linux/poll.h> -#include <linux/personality.h> -#include <linux/stat.h> -#include <linux/filter.h> -#include <linux/highmem.h> -#include <linux/mman.h> -#include <linux/ipv6.h> -#include <linux/in.h> -#include <linux/icmpv6.h> -#include <linux/syscalls.h> -#include <linux/sysctl.h> -#include <linux/binfmts.h> -#include <linux/capability.h> -#include <linux/compat.h> -#include <linux/vfs.h> -#include <linux/ptrace.h> -#include <linux/fadvise.h> -#include <linux/ipc.h> -#include <linux/slab.h> - -#include <asm/types.h> -#include <linux/uaccess.h> - -#include <net/scm.h> -#include <net/sock.h> - -#include "compat_linux.h" - -#ifdef CONFIG_SYSVIPC -COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, compat_ulong_t, second, - compat_ulong_t, third, compat_uptr_t, ptr) -{ - if (call >> 16) /* hack for backward compatibility */ - return -EINVAL; - return compat_ksys_ipc(call, first, second, third, ptr, third); -} -#endif - -COMPAT_SYSCALL_DEFINE3(s390_truncate64, const char __user *, path, u32, high, u32, low) -{ - return ksys_truncate(path, (unsigned long)high << 32 | low); -} - -COMPAT_SYSCALL_DEFINE3(s390_ftruncate64, unsigned int, fd, u32, high, u32, low) -{ - return ksys_ftruncate(fd, (unsigned long)high << 32 | low); -} - -COMPAT_SYSCALL_DEFINE5(s390_pread64, unsigned int, fd, char __user *, ubuf, - compat_size_t, count, u32, high, u32, low) -{ - if ((compat_ssize_t) count < 0) - return -EINVAL; - return ksys_pread64(fd, ubuf, count, (unsigned long)high << 32 | low); -} - -COMPAT_SYSCALL_DEFINE5(s390_pwrite64, unsigned int, fd, const char __user *, ubuf, - compat_size_t, count, u32, high, u32, low) -{ - if ((compat_ssize_t) count < 0) - return -EINVAL; - return ksys_pwrite64(fd, ubuf, count, (unsigned long)high << 32 | low); -} - -COMPAT_SYSCALL_DEFINE4(s390_readahead, int, fd, u32, high, u32, low, s32, count) -{ - return ksys_readahead(fd, (unsigned long)high << 32 | low, count); -} - -struct stat64_emu31 { - unsigned long long st_dev; - unsigned int __pad1; -#define STAT64_HAS_BROKEN_ST_INO 1 - u32 __st_ino; - unsigned int st_mode; - unsigned int st_nlink; - u32 st_uid; - u32 st_gid; - unsigned long long st_rdev; - unsigned int __pad3; - long st_size; - u32 st_blksize; - unsigned char __pad4[4]; - u32 __pad5; /* future possible st_blocks high bits */ - u32 st_blocks; /* Number 512-byte blocks allocated. */ - u32 st_atime; - u32 __pad6; - u32 st_mtime; - u32 __pad7; - u32 st_ctime; - u32 __pad8; /* will be high 32 bits of ctime someday */ - unsigned long st_ino; -}; - -static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat) -{ - struct stat64_emu31 tmp; - - memset(&tmp, 0, sizeof(tmp)); - - tmp.st_dev = huge_encode_dev(stat->dev); - tmp.st_ino = stat->ino; - tmp.__st_ino = (u32)stat->ino; - tmp.st_mode = stat->mode; - tmp.st_nlink = (unsigned int)stat->nlink; - tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid); - tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid); - tmp.st_rdev = huge_encode_dev(stat->rdev); - tmp.st_size = stat->size; - tmp.st_blksize = (u32)stat->blksize; - tmp.st_blocks = (u32)stat->blocks; - tmp.st_atime = (u32)stat->atime.tv_sec; - tmp.st_mtime = (u32)stat->mtime.tv_sec; - tmp.st_ctime = (u32)stat->ctime.tv_sec; - - return copy_to_user(ubuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; -} - -COMPAT_SYSCALL_DEFINE2(s390_stat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf) -{ - struct kstat stat; - int ret = vfs_stat(filename, &stat); - if (!ret) - ret = cp_stat64(statbuf, &stat); - return ret; -} - -COMPAT_SYSCALL_DEFINE2(s390_lstat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf) -{ - struct kstat stat; - int ret = vfs_lstat(filename, &stat); - if (!ret) - ret = cp_stat64(statbuf, &stat); - return ret; -} - -COMPAT_SYSCALL_DEFINE2(s390_fstat64, unsigned int, fd, struct stat64_emu31 __user *, statbuf) -{ - struct kstat stat; - int ret = vfs_fstat(fd, &stat); - if (!ret) - ret = cp_stat64(statbuf, &stat); - return ret; -} - -COMPAT_SYSCALL_DEFINE4(s390_fstatat64, unsigned int, dfd, const char __user *, filename, - struct stat64_emu31 __user *, statbuf, int, flag) -{ - struct kstat stat; - int error; - - error = vfs_fstatat(dfd, filename, &stat, flag); - if (error) - return error; - return cp_stat64(statbuf, &stat); -} - -/* - * Linux/i386 didn't use to be able to handle more than - * 4 system call parameters, so these system calls used a memory - * block for parameter passing.. - */ - -struct mmap_arg_struct_emu31 { - compat_ulong_t addr; - compat_ulong_t len; - compat_ulong_t prot; - compat_ulong_t flags; - compat_ulong_t fd; - compat_ulong_t offset; -}; - -COMPAT_SYSCALL_DEFINE1(s390_old_mmap, struct mmap_arg_struct_emu31 __user *, arg) -{ - struct mmap_arg_struct_emu31 a; - - if (copy_from_user(&a, arg, sizeof(a))) - return -EFAULT; - if (a.offset & ~PAGE_MASK) - return -EINVAL; - return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, - a.offset >> PAGE_SHIFT); -} - -COMPAT_SYSCALL_DEFINE1(s390_mmap2, struct mmap_arg_struct_emu31 __user *, arg) -{ - struct mmap_arg_struct_emu31 a; - - if (copy_from_user(&a, arg, sizeof(a))) - return -EFAULT; - return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); -} - -COMPAT_SYSCALL_DEFINE3(s390_read, unsigned int, fd, char __user *, buf, compat_size_t, count) -{ - if ((compat_ssize_t) count < 0) - return -EINVAL; - - return ksys_read(fd, buf, count); -} - -COMPAT_SYSCALL_DEFINE3(s390_write, unsigned int, fd, const char __user *, buf, compat_size_t, count) -{ - if ((compat_ssize_t) count < 0) - return -EINVAL; - - return ksys_write(fd, buf, count); -} - -/* - * 31 bit emulation wrapper functions for sys_fadvise64/fadvise64_64. - * These need to rewrite the advise values for POSIX_FADV_{DONTNEED,NOREUSE} - * because the 31 bit values differ from the 64 bit values. - */ - -COMPAT_SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, high, u32, low, compat_size_t, len, int, advise) -{ - if (advise == 4) - advise = POSIX_FADV_DONTNEED; - else if (advise == 5) - advise = POSIX_FADV_NOREUSE; - return ksys_fadvise64_64(fd, (unsigned long)high << 32 | low, len, - advise); -} - -struct fadvise64_64_args { - int fd; - long long offset; - long long len; - int advice; -}; - -COMPAT_SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args) -{ - struct fadvise64_64_args a; - - if ( copy_from_user(&a, args, sizeof(a)) ) - return -EFAULT; - if (a.advice == 4) - a.advice = POSIX_FADV_DONTNEED; - else if (a.advice == 5) - a.advice = POSIX_FADV_NOREUSE; - return ksys_fadvise64_64(a.fd, a.offset, a.len, a.advice); -} - -COMPAT_SYSCALL_DEFINE6(s390_sync_file_range, int, fd, u32, offhigh, u32, offlow, - u32, nhigh, u32, nlow, unsigned int, flags) -{ - return ksys_sync_file_range(fd, ((loff_t)offhigh << 32) + offlow, - ((u64)nhigh << 32) + nlow, flags); -} - -COMPAT_SYSCALL_DEFINE6(s390_fallocate, int, fd, int, mode, u32, offhigh, u32, offlow, - u32, lenhigh, u32, lenlow) -{ - return ksys_fallocate(fd, mode, ((loff_t)offhigh << 32) + offlow, - ((u64)lenhigh << 32) + lenlow); -} diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h deleted file mode 100644 index ef23739b277c..000000000000 --- a/arch/s390/kernel/compat_linux.h +++ /dev/null @@ -1,101 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_S390X_S390_H -#define _ASM_S390X_S390_H - -#include <linux/compat.h> -#include <linux/socket.h> -#include <linux/syscalls.h> -#include <asm/ptrace.h> - -/* - * Macro that masks the high order bit of a 32 bit pointer and - * converts it to a 64 bit pointer. - */ -#define A(__x) ((unsigned long)((__x) & 0x7FFFFFFFUL)) -#define AA(__x) ((unsigned long)(__x)) - -/* Now 32bit compatibility types */ -struct ipc_kludge_32 { - __u32 msgp; /* pointer */ - __s32 msgtyp; -}; - -/* asm/sigcontext.h */ -typedef union { - __u64 d; - __u32 f; -} freg_t32; - -typedef struct { - unsigned int fpc; - unsigned int pad; - freg_t32 fprs[__NUM_FPRS]; -} _s390_fp_regs32; - -typedef struct { - psw_t32 psw; - __u32 gprs[__NUM_GPRS]; - __u32 acrs[__NUM_ACRS]; -} _s390_regs_common32; - -typedef struct { - _s390_regs_common32 regs; - _s390_fp_regs32 fpregs; -} _sigregs32; - -typedef struct { - __u32 gprs_high[__NUM_GPRS]; - __u64 vxrs_low[__NUM_VXRS_LOW]; - __vector128 vxrs_high[__NUM_VXRS_HIGH]; - __u8 __reserved[128]; -} _sigregs_ext32; - -#define _SIGCONTEXT_NSIG32 64 -#define _SIGCONTEXT_NSIG_BPW32 32 -#define __SIGNAL_FRAMESIZE32 96 -#define _SIGMASK_COPY_SIZE32 (sizeof(u32) * 2) - -struct sigcontext32 { - __u32 oldmask[_COMPAT_NSIG_WORDS]; - __u32 sregs; /* pointer */ -}; - -/* asm/signal.h */ - -/* asm/ucontext.h */ -struct ucontext32 { - __u32 uc_flags; - __u32 uc_link; /* pointer */ - compat_stack_t uc_stack; - _sigregs32 uc_mcontext; - compat_sigset_t uc_sigmask; - /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */ - unsigned char __unused[128 - sizeof(compat_sigset_t)]; - _sigregs_ext32 uc_mcontext_ext; -}; - -struct stat64_emu31; -struct mmap_arg_struct_emu31; -struct fadvise64_64_args; - -long compat_sys_s390_truncate64(const char __user *path, u32 high, u32 low); -long compat_sys_s390_ftruncate64(unsigned int fd, u32 high, u32 low); -long compat_sys_s390_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, u32 high, u32 low); -long compat_sys_s390_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, u32 high, u32 low); -long compat_sys_s390_readahead(int fd, u32 high, u32 low, s32 count); -long compat_sys_s390_stat64(const char __user *filename, struct stat64_emu31 __user *statbuf); -long compat_sys_s390_lstat64(const char __user *filename, struct stat64_emu31 __user *statbuf); -long compat_sys_s390_fstat64(unsigned int fd, struct stat64_emu31 __user *statbuf); -long compat_sys_s390_fstatat64(unsigned int dfd, const char __user *filename, struct stat64_emu31 __user *statbuf, int flag); -long compat_sys_s390_old_mmap(struct mmap_arg_struct_emu31 __user *arg); -long compat_sys_s390_mmap2(struct mmap_arg_struct_emu31 __user *arg); -long compat_sys_s390_read(unsigned int fd, char __user *buf, compat_size_t count); -long compat_sys_s390_write(unsigned int fd, const char __user *buf, compat_size_t count); -long compat_sys_s390_fadvise64(int fd, u32 high, u32 low, compat_size_t len, int advise); -long compat_sys_s390_fadvise64_64(struct fadvise64_64_args __user *args); -long compat_sys_s390_sync_file_range(int fd, u32 offhigh, u32 offlow, u32 nhigh, u32 nlow, unsigned int flags); -long compat_sys_s390_fallocate(int fd, int mode, u32 offhigh, u32 offlow, u32 lenhigh, u32 lenlow); -long compat_sys_sigreturn(void); -long compat_sys_rt_sigreturn(void); - -#endif /* _ASM_S390X_S390_H */ diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h deleted file mode 100644 index 3c400fc7e987..000000000000 --- a/arch/s390/kernel/compat_ptrace.h +++ /dev/null @@ -1,64 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _PTRACE32_H -#define _PTRACE32_H - -#include <asm/ptrace.h> /* needed for NUM_CR_WORDS */ -#include "compat_linux.h" /* needed for psw_compat_t */ - -struct compat_per_struct_kernel { - __u32 cr9; /* PER control bits */ - __u32 cr10; /* PER starting address */ - __u32 cr11; /* PER ending address */ - __u32 bits; /* Obsolete software bits */ - __u32 starting_addr; /* User specified start address */ - __u32 ending_addr; /* User specified end address */ - __u16 perc_atmid; /* PER trap ATMID */ - __u32 address; /* PER trap instruction address */ - __u8 access_id; /* PER trap access identification */ -}; - -struct compat_user_regs_struct -{ - psw_compat_t psw; - u32 gprs[NUM_GPRS]; - u32 acrs[NUM_ACRS]; - u32 orig_gpr2; - /* nb: there's a 4-byte hole here */ - s390_fp_regs fp_regs; - /* - * These per registers are in here so that gdb can modify them - * itself as there is no "official" ptrace interface for hardware - * watchpoints. This is the way intel does it. - */ - struct compat_per_struct_kernel per_info; - u32 ieee_instruction_pointer; /* obsolete, always 0 */ -}; - -struct compat_user { - /* We start with the registers, to mimic the way that "memory" - is returned from the ptrace(3,...) function. */ - struct compat_user_regs_struct regs; - /* The rest of this junk is to help gdb figure out what goes where */ - u32 u_tsize; /* Text segment size (pages). */ - u32 u_dsize; /* Data segment size (pages). */ - u32 u_ssize; /* Stack segment size (pages). */ - u32 start_code; /* Starting virtual address of text. */ - u32 start_stack; /* Starting virtual address of stack area. - This is actually the bottom of the stack, - the top of the stack is always found in the - esp register. */ - s32 signal; /* Signal that caused the core dump. */ - u32 u_ar0; /* Used by gdb to help find the values for */ - /* the registers. */ - u32 magic; /* To uniquely identify a core file */ - char u_comm[32]; /* User command that was responsible */ -}; - -typedef struct -{ - __u32 len; - __u32 kernel_addr; - __u32 process_addr; -} compat_ptrace_area; - -#endif /* _PTRACE32_H */ diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c deleted file mode 100644 index 5a86b9d1da71..000000000000 --- a/arch/s390/kernel/compat_signal.c +++ /dev/null @@ -1,420 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright IBM Corp. 2000, 2006 - * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) - * Gerhard Tonn (ton@de.ibm.com) - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson - */ - -#include <linux/compat.h> -#include <linux/sched.h> -#include <linux/sched/task_stack.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/kernel.h> -#include <linux/signal.h> -#include <linux/errno.h> -#include <linux/wait.h> -#include <linux/ptrace.h> -#include <linux/unistd.h> -#include <linux/stddef.h> -#include <linux/tty.h> -#include <linux/personality.h> -#include <linux/binfmts.h> -#include <asm/vdso-symbols.h> -#include <asm/access-regs.h> -#include <asm/ucontext.h> -#include <linux/uaccess.h> -#include <asm/lowcore.h> -#include <asm/fpu.h> -#include "compat_linux.h" -#include "compat_ptrace.h" -#include "entry.h" - -typedef struct -{ - __u8 callee_used_stack[__SIGNAL_FRAMESIZE32]; - struct sigcontext32 sc; - _sigregs32 sregs; - int signo; - _sigregs_ext32 sregs_ext; - __u16 svc_insn; /* Offset of svc_insn is NOT fixed! */ -} sigframe32; - -typedef struct -{ - __u8 callee_used_stack[__SIGNAL_FRAMESIZE32]; - __u16 svc_insn; - compat_siginfo_t info; - struct ucontext32 uc; -} rt_sigframe32; - -/* Store registers needed to create the signal frame */ -static void store_sigregs(void) -{ - save_access_regs(current->thread.acrs); - save_user_fpu_regs(); -} - -/* Load registers after signal return */ -static void load_sigregs(void) -{ - restore_access_regs(current->thread.acrs); -} - -static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) -{ - _sigregs32 user_sregs; - int i; - - user_sregs.regs.psw.mask = (__u32)(regs->psw.mask >> 32); - user_sregs.regs.psw.mask &= PSW32_MASK_USER | PSW32_MASK_RI; - user_sregs.regs.psw.mask |= PSW32_USER_BITS; - user_sregs.regs.psw.addr = (__u32) regs->psw.addr | - (__u32)(regs->psw.mask & PSW_MASK_BA); - for (i = 0; i < NUM_GPRS; i++) - user_sregs.regs.gprs[i] = (__u32) regs->gprs[i]; - memcpy(&user_sregs.regs.acrs, current->thread.acrs, - sizeof(user_sregs.regs.acrs)); - fpregs_store((_s390_fp_regs *) &user_sregs.fpregs, ¤t->thread.ufpu); - if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32))) - return -EFAULT; - return 0; -} - -static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) -{ - _sigregs32 user_sregs; - int i; - - /* Always make any pending restarted system call return -EINTR */ - current->restart_block.fn = do_no_restart_syscall; - - if (__copy_from_user(&user_sregs, &sregs->regs, sizeof(user_sregs))) - return -EFAULT; - - if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI)) - return -EINVAL; - - /* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */ - regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) | - (__u64)(user_sregs.regs.psw.mask & PSW32_MASK_USER) << 32 | - (__u64)(user_sregs.regs.psw.mask & PSW32_MASK_RI) << 32 | - (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_AMODE); - /* Check for invalid user address space control. */ - if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME) - regs->psw.mask = PSW_ASC_PRIMARY | - (regs->psw.mask & ~PSW_MASK_ASC); - regs->psw.addr = (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_INSN); - for (i = 0; i < NUM_GPRS; i++) - regs->gprs[i] = (__u64) user_sregs.regs.gprs[i]; - memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, - sizeof(current->thread.acrs)); - fpregs_load((_s390_fp_regs *)&user_sregs.fpregs, ¤t->thread.ufpu); - - clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ - return 0; -} - -static int save_sigregs_ext32(struct pt_regs *regs, - _sigregs_ext32 __user *sregs_ext) -{ - __u32 gprs_high[NUM_GPRS]; - __u64 vxrs[__NUM_VXRS_LOW]; - int i; - - /* Save high gprs to signal stack */ - for (i = 0; i < NUM_GPRS; i++) - gprs_high[i] = regs->gprs[i] >> 32; - if (__copy_to_user(&sregs_ext->gprs_high, &gprs_high, - sizeof(sregs_ext->gprs_high))) - return -EFAULT; - - /* Save vector registers to signal stack */ - if (cpu_has_vx()) { - for (i = 0; i < __NUM_VXRS_LOW; i++) - vxrs[i] = current->thread.ufpu.vxrs[i].low; - if (__copy_to_user(&sregs_ext->vxrs_low, vxrs, - sizeof(sregs_ext->vxrs_low)) || - __copy_to_user(&sregs_ext->vxrs_high, - current->thread.ufpu.vxrs + __NUM_VXRS_LOW, - sizeof(sregs_ext->vxrs_high))) - return -EFAULT; - } - return 0; -} - -static int restore_sigregs_ext32(struct pt_regs *regs, - _sigregs_ext32 __user *sregs_ext) -{ - __u32 gprs_high[NUM_GPRS]; - __u64 vxrs[__NUM_VXRS_LOW]; - int i; - - /* Restore high gprs from signal stack */ - if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high, - sizeof(sregs_ext->gprs_high))) - return -EFAULT; - for (i = 0; i < NUM_GPRS; i++) - *(__u32 *)®s->gprs[i] = gprs_high[i]; - - /* Restore vector registers from signal stack */ - if (cpu_has_vx()) { - if (__copy_from_user(vxrs, &sregs_ext->vxrs_low, - sizeof(sregs_ext->vxrs_low)) || - __copy_from_user(current->thread.ufpu.vxrs + __NUM_VXRS_LOW, - &sregs_ext->vxrs_high, - sizeof(sregs_ext->vxrs_high))) - return -EFAULT; - for (i = 0; i < __NUM_VXRS_LOW; i++) - current->thread.ufpu.vxrs[i].low = vxrs[i]; - } - return 0; -} - -COMPAT_SYSCALL_DEFINE0(sigreturn) -{ - struct pt_regs *regs = task_pt_regs(current); - sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15]; - sigset_t set; - - if (get_compat_sigset(&set, (compat_sigset_t __user *)frame->sc.oldmask)) - goto badframe; - set_current_blocked(&set); - save_user_fpu_regs(); - if (restore_sigregs32(regs, &frame->sregs)) - goto badframe; - if (restore_sigregs_ext32(regs, &frame->sregs_ext)) - goto badframe; - load_sigregs(); - return regs->gprs[2]; -badframe: - force_sig(SIGSEGV); - return 0; -} - -COMPAT_SYSCALL_DEFINE0(rt_sigreturn) -{ - struct pt_regs *regs = task_pt_regs(current); - rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15]; - sigset_t set; - - if (get_compat_sigset(&set, &frame->uc.uc_sigmask)) - goto badframe; - set_current_blocked(&set); - if (compat_restore_altstack(&frame->uc.uc_stack)) - goto badframe; - save_user_fpu_regs(); - if (restore_sigregs32(regs, &frame->uc.uc_mcontext)) - goto badframe; - if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext)) - goto badframe; - load_sigregs(); - return regs->gprs[2]; -badframe: - force_sig(SIGSEGV); - return 0; -} - -/* - * Set up a signal frame. - */ - - -/* - * Determine which stack to use.. - */ -static inline void __user * -get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) -{ - unsigned long sp; - - /* Default to using normal stack */ - sp = (unsigned long) A(regs->gprs[15]); - - /* Overflow on alternate signal stack gives SIGSEGV. */ - if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL)) - return (void __user *) -1UL; - - /* This is the X/Open sanctioned signal stack switching. */ - if (ka->sa.sa_flags & SA_ONSTACK) { - if (! sas_ss_flags(sp)) - sp = current->sas_ss_sp + current->sas_ss_size; - } - - return (void __user *)((sp - frame_size) & -8ul); -} - -static int setup_frame32(struct ksignal *ksig, sigset_t *set, - struct pt_regs *regs) -{ - int sig = ksig->sig; - sigframe32 __user *frame; - unsigned long restorer; - size_t frame_size; - - /* - * gprs_high are always present for 31-bit compat tasks. - * The space for vector registers is only allocated if - * the machine supports it - */ - frame_size = sizeof(*frame) - sizeof(frame->sregs_ext.__reserved); - if (!cpu_has_vx()) - frame_size -= sizeof(frame->sregs_ext.vxrs_low) + - sizeof(frame->sregs_ext.vxrs_high); - frame = get_sigframe(&ksig->ka, regs, frame_size); - if (frame == (void __user *) -1UL) - return -EFAULT; - - /* Set up backchain. */ - if (__put_user(regs->gprs[15], (unsigned int __user *) frame)) - return -EFAULT; - - /* Create struct sigcontext32 on the signal stack */ - if (put_compat_sigset((compat_sigset_t __user *)frame->sc.oldmask, - set, sizeof(compat_sigset_t))) - return -EFAULT; - if (__put_user(ptr_to_compat(&frame->sregs), &frame->sc.sregs)) - return -EFAULT; - - /* Store registers needed to create the signal frame */ - store_sigregs(); - - /* Create _sigregs32 on the signal stack */ - if (save_sigregs32(regs, &frame->sregs)) - return -EFAULT; - - /* Place signal number on stack to allow backtrace from handler. */ - if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo)) - return -EFAULT; - - /* Create _sigregs_ext32 on the signal stack */ - if (save_sigregs_ext32(regs, &frame->sregs_ext)) - return -EFAULT; - - /* Set up to return from userspace. If provided, use a stub - already in userspace. */ - if (ksig->ka.sa.sa_flags & SA_RESTORER) { - restorer = (unsigned long __force) - ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE; - } else { - restorer = VDSO32_SYMBOL(current, sigreturn); - } - - /* Set up registers for signal handler */ - regs->gprs[14] = restorer; - regs->gprs[15] = (__force __u64) frame; - /* Force 31 bit amode and default user address space control. */ - regs->psw.mask = PSW_MASK_BA | - (PSW_USER_BITS & PSW_MASK_ASC) | - (regs->psw.mask & ~PSW_MASK_ASC); - regs->psw.addr = (__force __u64) ksig->ka.sa.sa_handler; - - regs->gprs[2] = sig; - regs->gprs[3] = (__force __u64) &frame->sc; - - /* We forgot to include these in the sigcontext. - To avoid breaking binary compatibility, they are passed as args. */ - if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL || - sig == SIGTRAP || sig == SIGFPE) { - /* set extra registers only for synchronous signals */ - regs->gprs[4] = regs->int_code & 127; - regs->gprs[5] = regs->int_parm_long; - regs->gprs[6] = current->thread.last_break; - } - - return 0; -} - -static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set, - struct pt_regs *regs) -{ - rt_sigframe32 __user *frame; - unsigned long restorer; - size_t frame_size; - u32 uc_flags; - - frame_size = sizeof(*frame) - - sizeof(frame->uc.uc_mcontext_ext.__reserved); - /* - * gprs_high are always present for 31-bit compat tasks. - * The space for vector registers is only allocated if - * the machine supports it - */ - uc_flags = UC_GPRS_HIGH; - if (cpu_has_vx()) { - uc_flags |= UC_VXRS; - } else { - frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) + - sizeof(frame->uc.uc_mcontext_ext.vxrs_high); - } - frame = get_sigframe(&ksig->ka, regs, frame_size); - if (frame == (void __user *) -1UL) - return -EFAULT; - - /* Set up backchain. */ - if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame)) - return -EFAULT; - - /* Set up to return from userspace. If provided, use a stub - already in userspace. */ - if (ksig->ka.sa.sa_flags & SA_RESTORER) { - restorer = (unsigned long __force) - ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE; - } else { - restorer = VDSO32_SYMBOL(current, rt_sigreturn); - } - - /* Create siginfo on the signal stack */ - if (copy_siginfo_to_user32(&frame->info, &ksig->info)) - return -EFAULT; - - /* Store registers needed to create the signal frame */ - store_sigregs(); - - /* Create ucontext on the signal stack. */ - if (__put_user(uc_flags, &frame->uc.uc_flags) || - __put_user(0, &frame->uc.uc_link) || - __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]) || - save_sigregs32(regs, &frame->uc.uc_mcontext) || - put_compat_sigset(&frame->uc.uc_sigmask, set, sizeof(compat_sigset_t)) || - save_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext)) - return -EFAULT; - - /* Set up registers for signal handler */ - regs->gprs[14] = restorer; - regs->gprs[15] = (__force __u64) frame; - /* Force 31 bit amode and default user address space control. */ - regs->psw.mask = PSW_MASK_BA | - (PSW_USER_BITS & PSW_MASK_ASC) | - (regs->psw.mask & ~PSW_MASK_ASC); - regs->psw.addr = (__u64 __force) ksig->ka.sa.sa_handler; - - regs->gprs[2] = ksig->sig; - regs->gprs[3] = (__force __u64) &frame->info; - regs->gprs[4] = (__force __u64) &frame->uc; - regs->gprs[5] = current->thread.last_break; - return 0; -} - -/* - * OK, we're invoking a handler - */ - -void handle_signal32(struct ksignal *ksig, sigset_t *oldset, - struct pt_regs *regs) -{ - int ret; - - /* Set up the stack frame */ - if (ksig->ka.sa.sa_flags & SA_SIGINFO) - ret = setup_rt_frame32(ksig, oldset, regs); - else - ret = setup_frame32(ksig, oldset, regs); - - signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLE_STEP)); -} - diff --git a/arch/s390/kernel/cpacf.c b/arch/s390/kernel/cpacf.c index c8575dbc890d..9d85b4bc7036 100644 --- a/arch/s390/kernel/cpacf.c +++ b/arch/s390/kernel/cpacf.c @@ -3,8 +3,7 @@ * Copyright IBM Corp. 2024 */ -#define KMSG_COMPONENT "cpacf" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "cpacf: " fmt #include <linux/cpu.h> #include <linux/device.h> @@ -14,7 +13,7 @@ #define CPACF_QUERY(name, instruction) \ static ssize_t name##_query_raw_read(struct file *fp, \ struct kobject *kobj, \ - struct bin_attribute *attr, \ + const struct bin_attribute *attr, \ char *buf, loff_t offs, \ size_t count) \ { \ @@ -24,7 +23,7 @@ static ssize_t name##_query_raw_read(struct file *fp, \ return -EOPNOTSUPP; \ return memory_read_from_buffer(buf, count, &offs, &mask, sizeof(mask)); \ } \ -static BIN_ATTR_RO(name##_query_raw, sizeof(cpacf_mask_t)) +static const BIN_ATTR_RO(name##_query_raw, sizeof(cpacf_mask_t)) CPACF_QUERY(km, KM); CPACF_QUERY(kmc, KMC); @@ -40,20 +39,20 @@ CPACF_QUERY(prno, PRNO); CPACF_QUERY(kma, KMA); CPACF_QUERY(kdsa, KDSA); -#define CPACF_QAI(name, instruction) \ -static ssize_t name##_query_auth_info_raw_read( \ - struct file *fp, struct kobject *kobj, \ - struct bin_attribute *attr, char *buf, loff_t offs, \ - size_t count) \ -{ \ - cpacf_qai_t qai; \ - \ - if (!cpacf_qai(CPACF_##instruction, &qai)) \ - return -EOPNOTSUPP; \ - return memory_read_from_buffer(buf, count, &offs, &qai, \ - sizeof(qai)); \ -} \ -static BIN_ATTR_RO(name##_query_auth_info_raw, sizeof(cpacf_qai_t)) +#define CPACF_QAI(name, instruction) \ +static ssize_t name##_query_auth_info_raw_read( \ + struct file *fp, struct kobject *kobj, \ + const struct bin_attribute *attr, char *buf, loff_t offs, \ + size_t count) \ +{ \ + cpacf_qai_t qai; \ + \ + if (!cpacf_qai(CPACF_##instruction, &qai)) \ + return -EOPNOTSUPP; \ + return memory_read_from_buffer(buf, count, &offs, &qai, \ + sizeof(qai)); \ +} \ +static const BIN_ATTR_RO(name##_query_auth_info_raw, sizeof(cpacf_qai_t)) CPACF_QAI(km, KM); CPACF_QAI(kmc, KMC); @@ -69,7 +68,7 @@ CPACF_QAI(prno, PRNO); CPACF_QAI(kma, KMA); CPACF_QAI(kdsa, KDSA); -static struct bin_attribute *cpacf_attrs[] = { +static const struct bin_attribute *const cpacf_attrs[] = { &bin_attr_km_query_raw, &bin_attr_kmc_query_raw, &bin_attr_kimd_query_raw, diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index 2f4174b961de..ab611764642a 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -6,8 +6,7 @@ * Christian Borntraeger (cborntra@de.ibm.com), */ -#define KMSG_COMPONENT "cpcmd" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "cpcmd: " fmt #include <linux/kernel.h> #include <linux/export.h> diff --git a/arch/s390/kernel/cpufeature.c b/arch/s390/kernel/cpufeature.c index 1b2ae42a0c15..c9eef9ed876b 100644 --- a/arch/s390/kernel/cpufeature.c +++ b/arch/s390/kernel/cpufeature.c @@ -4,12 +4,15 @@ */ #include <linux/cpufeature.h> +#include <linux/export.h> #include <linux/bug.h> +#include <asm/machine.h> #include <asm/elf.h> enum { TYPE_HWCAP, TYPE_FACILITY, + TYPE_MACHINE, }; struct s390_cpu_feature { @@ -21,6 +24,7 @@ static struct s390_cpu_feature s390_cpu_features[MAX_CPU_FEATURES] = { [S390_CPU_FEATURE_MSA] = {.type = TYPE_HWCAP, .num = HWCAP_NR_MSA}, [S390_CPU_FEATURE_VXRS] = {.type = TYPE_HWCAP, .num = HWCAP_NR_VXRS}, [S390_CPU_FEATURE_UV] = {.type = TYPE_FACILITY, .num = 158}, + [S390_CPU_FEATURE_D288] = {.type = TYPE_MACHINE, .num = MFEATURE_DIAG288}, }; /* @@ -38,6 +42,8 @@ int cpu_have_feature(unsigned int num) return !!(elf_hwcap & BIT(feature->num)); case TYPE_FACILITY: return test_facility(feature->num); + case TYPE_MACHINE: + return test_machine_feature(feature->num); default: WARN_ON_ONCE(1); return 0; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index cd0c93a8fb8b..d4839de8ce9d 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -7,6 +7,7 @@ */ #include <linux/crash_dump.h> +#include <linux/export.h> #include <asm/lowcore.h> #include <linux/kernel.h> #include <linux/init.h> @@ -63,9 +64,7 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu) { struct save_area *sa; - sa = memblock_alloc(sizeof(*sa), 8); - if (!sa) - return NULL; + sa = memblock_alloc_or_panic(sizeof(*sa), 8); if (is_boot_cpu) list_add(&sa->list, &dump_save_areas); @@ -248,15 +247,6 @@ bool is_kdump_kernel(void) } EXPORT_SYMBOL_GPL(is_kdump_kernel); -static const char *nt_name(Elf64_Word type) -{ - const char *name = "LINUX"; - - if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG) - name = KEXEC_CORE_NOTE_NAME; - return name; -} - /* * Initialize ELF note */ @@ -281,10 +271,8 @@ static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len, return PTR_ADD(buf, len); } -static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len) -{ - return nt_init_name(buf, type, desc, d_len, nt_name(type)); -} +#define nt_init(buf, type, desc) \ + nt_init_name(buf, NT_ ## type, &(desc), sizeof(desc), NN_ ## type) /* * Calculate the size of ELF note @@ -300,10 +288,7 @@ static size_t nt_size_name(int d_len, const char *name) return size; } -static inline size_t nt_size(Elf64_Word type, int d_len) -{ - return nt_size_name(d_len, nt_name(type)); -} +#define nt_size(type, desc) nt_size_name(sizeof(desc), NN_ ## type) /* * Fill ELF notes for one CPU with save area registers @@ -324,18 +309,16 @@ static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa) memcpy(&nt_fpregset.fpc, &sa->fpc, sizeof(sa->fpc)); memcpy(&nt_fpregset.fprs, &sa->fprs, sizeof(sa->fprs)); /* Create ELF notes for the CPU */ - ptr = nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus)); - ptr = nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset)); - ptr = nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer)); - ptr = nt_init(ptr, NT_S390_TODCMP, &sa->todcmp, sizeof(sa->todcmp)); - ptr = nt_init(ptr, NT_S390_TODPREG, &sa->todpreg, sizeof(sa->todpreg)); - ptr = nt_init(ptr, NT_S390_CTRS, &sa->ctrs, sizeof(sa->ctrs)); - ptr = nt_init(ptr, NT_S390_PREFIX, &sa->prefix, sizeof(sa->prefix)); + ptr = nt_init(ptr, PRSTATUS, nt_prstatus); + ptr = nt_init(ptr, PRFPREG, nt_fpregset); + ptr = nt_init(ptr, S390_TIMER, sa->timer); + ptr = nt_init(ptr, S390_TODCMP, sa->todcmp); + ptr = nt_init(ptr, S390_TODPREG, sa->todpreg); + ptr = nt_init(ptr, S390_CTRS, sa->ctrs); + ptr = nt_init(ptr, S390_PREFIX, sa->prefix); if (cpu_has_vx()) { - ptr = nt_init(ptr, NT_S390_VXRS_HIGH, - &sa->vxrs_high, sizeof(sa->vxrs_high)); - ptr = nt_init(ptr, NT_S390_VXRS_LOW, - &sa->vxrs_low, sizeof(sa->vxrs_low)); + ptr = nt_init(ptr, S390_VXRS_HIGH, sa->vxrs_high); + ptr = nt_init(ptr, S390_VXRS_LOW, sa->vxrs_low); } return ptr; } @@ -348,16 +331,16 @@ static size_t get_cpu_elf_notes_size(void) struct save_area *sa = NULL; size_t size; - size = nt_size(NT_PRSTATUS, sizeof(struct elf_prstatus)); - size += nt_size(NT_PRFPREG, sizeof(elf_fpregset_t)); - size += nt_size(NT_S390_TIMER, sizeof(sa->timer)); - size += nt_size(NT_S390_TODCMP, sizeof(sa->todcmp)); - size += nt_size(NT_S390_TODPREG, sizeof(sa->todpreg)); - size += nt_size(NT_S390_CTRS, sizeof(sa->ctrs)); - size += nt_size(NT_S390_PREFIX, sizeof(sa->prefix)); + size = nt_size(PRSTATUS, struct elf_prstatus); + size += nt_size(PRFPREG, elf_fpregset_t); + size += nt_size(S390_TIMER, sa->timer); + size += nt_size(S390_TODCMP, sa->todcmp); + size += nt_size(S390_TODPREG, sa->todpreg); + size += nt_size(S390_CTRS, sa->ctrs); + size += nt_size(S390_PREFIX, sa->prefix); if (cpu_has_vx()) { - size += nt_size(NT_S390_VXRS_HIGH, sizeof(sa->vxrs_high)); - size += nt_size(NT_S390_VXRS_LOW, sizeof(sa->vxrs_low)); + size += nt_size(S390_VXRS_HIGH, sa->vxrs_high); + size += nt_size(S390_VXRS_LOW, sa->vxrs_low); } return size; @@ -372,8 +355,8 @@ static void *nt_prpsinfo(void *ptr) memset(&prpsinfo, 0, sizeof(prpsinfo)); prpsinfo.pr_sname = 'R'; - strcpy(prpsinfo.pr_fname, "vmlinux"); - return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo)); + strscpy(prpsinfo.pr_fname, "vmlinux"); + return nt_init(ptr, PRPSINFO, prpsinfo); } /* @@ -508,6 +491,19 @@ static int get_mem_chunk_cnt(void) return cnt; } +static void fill_ptload(Elf64_Phdr *phdr, unsigned long paddr, + unsigned long vaddr, unsigned long size) +{ + phdr->p_type = PT_LOAD; + phdr->p_vaddr = vaddr; + phdr->p_offset = paddr; + phdr->p_paddr = paddr; + phdr->p_filesz = size; + phdr->p_memsz = size; + phdr->p_flags = PF_R | PF_W | PF_X; + phdr->p_align = PAGE_SIZE; +} + /* * Initialize ELF loads (new kernel) */ @@ -520,14 +516,8 @@ static void loads_init(Elf64_Phdr *phdr, bool os_info_has_vm) if (os_info_has_vm) old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE); for_each_physmem_range(idx, &oldmem_type, &start, &end) { - phdr->p_type = PT_LOAD; - phdr->p_vaddr = old_identity_base + start; - phdr->p_offset = start; - phdr->p_paddr = start; - phdr->p_filesz = end - start; - phdr->p_memsz = end - start; - phdr->p_flags = PF_R | PF_W | PF_X; - phdr->p_align = PAGE_SIZE; + fill_ptload(phdr, start, old_identity_base + start, + end - start); phdr++; } } @@ -537,6 +527,22 @@ static bool os_info_has_vm(void) return os_info_old_value(OS_INFO_KASLR_OFFSET); } +#ifdef CONFIG_PROC_VMCORE_DEVICE_RAM +/* + * Fill PT_LOAD for a physical memory range owned by a device and detected by + * its device driver. + */ +void elfcorehdr_fill_device_ram_ptload_elf64(Elf64_Phdr *phdr, + unsigned long long paddr, unsigned long long size) +{ + unsigned long old_identity_base = 0; + + if (os_info_has_vm()) + old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE); + fill_ptload(phdr, paddr, old_identity_base + paddr, size); +} +#endif + /* * Prepare PT_LOAD type program header for kernel image region */ @@ -589,7 +595,7 @@ static size_t get_elfcorehdr_size(int phdr_count) /* PT_NOTES */ size += sizeof(Elf64_Phdr); /* nt_prpsinfo */ - size += nt_size(NT_PRPSINFO, sizeof(struct elf_prpsinfo)); + size += nt_size(PRPSINFO, struct elf_prpsinfo); /* regsets */ size += get_cpu_cnt() * get_cpu_elf_notes_size(); /* nt_vmcoreinfo */ diff --git a/arch/s390/kernel/ctlreg.c b/arch/s390/kernel/ctlreg.c index 8cc26cf2c64a..a0501f4c7e7a 100644 --- a/arch/s390/kernel/ctlreg.c +++ b/arch/s390/kernel/ctlreg.c @@ -5,6 +5,7 @@ #include <linux/irqflags.h> #include <linux/spinlock.h> +#include <linux/export.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/smp.h> diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index b3f2103694e4..71cdb6845dd7 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -10,8 +10,7 @@ * Bugreports to: <Linux390@de.ibm.com> */ -#define KMSG_COMPONENT "s390dbf" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "s390dbf: " fmt #include <linux/stddef.h> #include <linux/kernel.h> @@ -24,6 +23,7 @@ #include <linux/export.h> #include <linux/init.h> #include <linux/fs.h> +#include <linux/math.h> #include <linux/minmax.h> #include <linux/debugfs.h> @@ -77,12 +77,14 @@ static debug_info_t *debug_info_create(const char *name, int pages_per_area, static void debug_info_get(debug_info_t *); static void debug_info_put(debug_info_t *); static int debug_prolog_level_fn(debug_info_t *id, - struct debug_view *view, char *out_buf); + struct debug_view *view, char *out_buf, + size_t out_buf_size); static int debug_input_level_fn(debug_info_t *id, struct debug_view *view, struct file *file, const char __user *user_buf, size_t user_buf_size, loff_t *offset); static int debug_prolog_pages_fn(debug_info_t *id, - struct debug_view *view, char *out_buf); + struct debug_view *view, char *out_buf, + size_t out_buf_size); static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view, struct file *file, const char __user *user_buf, size_t user_buf_size, loff_t *offset); @@ -90,9 +92,8 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view, struct file *file, const char __user *user_buf, size_t user_buf_size, loff_t *offset); static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *in_buf); -static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *inbuf); + char *out_buf, size_t out_buf_size, + const char *in_buf); static void debug_areas_swap(debug_info_t *a, debug_info_t *b); static void debug_events_append(debug_info_t *dest, debug_info_t *src); @@ -249,7 +250,7 @@ static debug_info_t *debug_info_alloc(const char *name, int pages_per_area, rc->level = level; rc->buf_size = buf_size; rc->entry_size = sizeof(debug_entry_t) + buf_size; - strscpy(rc->name, name, sizeof(rc->name)); + strscpy(rc->name, name); memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *)); memset(rc->debugfs_entries, 0, DEBUG_MAX_VIEWS * sizeof(struct dentry *)); refcount_set(&(rc->ref_count), 0); @@ -350,7 +351,10 @@ static debug_info_t *debug_info_copy(debug_info_t *in, int mode) for (i = 0; i < in->nr_areas; i++) { for (j = 0; j < in->pages_per_area; j++) memcpy(rc->areas[i][j], in->areas[i][j], PAGE_SIZE); + rc->active_pages[i] = in->active_pages[i]; + rc->active_entries[i] = in->active_entries[i]; } + rc->active_area = in->active_area; out: spin_unlock_irqrestore(&in->lock, flags); return rc; @@ -391,8 +395,10 @@ static int debug_format_entry(file_private_info_t *p_info) if (p_info->act_entry == DEBUG_PROLOG_ENTRY) { /* print prolog */ - if (view->prolog_proc) - len += view->prolog_proc(id_snap, view, p_info->temp_buf); + if (view->prolog_proc) { + len += view->prolog_proc(id_snap, view, p_info->temp_buf, + sizeof(p_info->temp_buf)); + } goto out; } if (!id_snap->areas) /* this is true, if we have a prolog only view */ @@ -402,21 +408,31 @@ static int debug_format_entry(file_private_info_t *p_info) if (act_entry->clock == 0LL) goto out; /* empty entry */ - if (view->header_proc) + if (view->header_proc) { len += view->header_proc(id_snap, view, p_info->act_area, - act_entry, p_info->temp_buf + len); - if (view->format_proc) + act_entry, p_info->temp_buf + len, + sizeof(p_info->temp_buf) - len); + } + if (view->format_proc) { len += view->format_proc(id_snap, view, p_info->temp_buf + len, + sizeof(p_info->temp_buf) - len, DEBUG_DATA(act_entry)); + } out: return len; } -/* - * debug_next_entry: - * - goto next entry in p_info +/** + * debug_next_entry - Go to the next entry + * @p_info: Private info that is manipulated + * + * Sets the current position in @p_info to the next entry. If no further entry + * exists the current position is set to one after the end the return value + * indicates that no further entries exist. + * + * Return: True if there are more following entries, false otherwise */ -static inline int debug_next_entry(file_private_info_t *p_info) +static inline bool debug_next_entry(file_private_info_t *p_info) { debug_info_t *id; @@ -424,10 +440,10 @@ static inline int debug_next_entry(file_private_info_t *p_info) if (p_info->act_entry == DEBUG_PROLOG_ENTRY) { p_info->act_entry = 0; p_info->act_page = 0; - goto out; + return true; } if (!id->areas) - return 1; + return false; p_info->act_entry += id->entry_size; /* switch to next page, if we reached the end of the page */ if (p_info->act_entry > (PAGE_SIZE - id->entry_size)) { @@ -440,10 +456,87 @@ static inline int debug_next_entry(file_private_info_t *p_info) p_info->act_page = 0; } if (p_info->act_area >= id->nr_areas) - return 1; + return false; } -out: - return 0; + return true; +} + +/** + * debug_to_act_entry - Go to the currently active entry + * @p_info: Private info that is manipulated + * + * Sets the current position in @p_info to the currently active + * entry of @p_info->debug_info_snap + */ +static void debug_to_act_entry(file_private_info_t *p_info) +{ + debug_info_t *snap_id; + + snap_id = p_info->debug_info_snap; + p_info->act_area = snap_id->active_area; + p_info->act_page = snap_id->active_pages[snap_id->active_area]; + p_info->act_entry = snap_id->active_entries[snap_id->active_area]; +} + +/** + * debug_prev_entry - Go to the previous entry + * @p_info: Private info that is manipulated + * + * Sets the current position in @p_info to the previous entry. If no previous entry + * exists the current position is set left as DEBUG_PROLOG_ENTRY and the return value + * indicates that no previous entries exist. + * + * Return: True if there are more previous entries, false otherwise + */ + +static inline bool debug_prev_entry(file_private_info_t *p_info) +{ + debug_info_t *id; + + id = p_info->debug_info_snap; + if (p_info->act_entry == DEBUG_PROLOG_ENTRY) + debug_to_act_entry(p_info); + if (!id->areas) + return false; + p_info->act_entry -= id->entry_size; + /* switch to prev page, if we reached the beginning of the page */ + if (p_info->act_entry < 0) { + /* end of previous page */ + p_info->act_entry = rounddown(PAGE_SIZE, id->entry_size) - id->entry_size; + p_info->act_page--; + if (p_info->act_page < 0) { + /* previous area */ + p_info->act_area--; + p_info->act_page = id->pages_per_area - 1; + } + if (p_info->act_area < 0) + p_info->act_area = (id->nr_areas - 1) % id->nr_areas; + } + /* check full circle */ + if (id->active_area == p_info->act_area && + id->active_pages[id->active_area] == p_info->act_page && + id->active_entries[id->active_area] == p_info->act_entry) + return false; + return true; +} + +/** + * debug_move_entry - Go to next entry in either the forward or backward direction + * @p_info: Private info that is manipulated + * @reverse: If true go to the next entry in reverse i.e. previous + * + * Sets the current position in @p_info to the next (@reverse == false) or + * previous (@reverse == true) entry. + * + * Return: True if there are further entries in that direction, + * false otherwise. + */ +static bool debug_move_entry(file_private_info_t *p_info, bool reverse) +{ + if (reverse) + return debug_prev_entry(p_info); + else + return debug_next_entry(p_info); } /* @@ -485,7 +578,7 @@ static ssize_t debug_output(struct file *file, /* file descriptor */ } if (copy_size == formatted_line_residue) { entry_offset = 0; - if (debug_next_entry(p_info)) + if (!debug_next_entry(p_info)) goto out; } } @@ -520,6 +613,42 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf, return rc; /* number of input characters */ } +static file_private_info_t *debug_file_private_alloc(debug_info_t *debug_info, + struct debug_view *view) +{ + debug_info_t *debug_info_snapshot; + file_private_info_t *p_info; + + /* + * Make snapshot of current debug areas to get it consistent. + * To copy all the areas is only needed, if we have a view which + * formats the debug areas. + */ + if (!view->format_proc && !view->header_proc) + debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS); + else + debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS); + + if (!debug_info_snapshot) + return NULL; + p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL); + if (!p_info) { + debug_info_free(debug_info_snapshot); + return NULL; + } + p_info->offset = 0; + p_info->debug_info_snap = debug_info_snapshot; + p_info->debug_info_org = debug_info; + p_info->view = view; + p_info->act_area = 0; + p_info->act_page = 0; + p_info->act_entry = DEBUG_PROLOG_ENTRY; + p_info->act_entry_offset = 0; + debug_info_get(debug_info); + + return p_info; +} + /* * debug_open: * - called for user open() @@ -528,7 +657,7 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf, */ static int debug_open(struct inode *inode, struct file *file) { - debug_info_t *debug_info, *debug_info_snapshot; + debug_info_t *debug_info; file_private_info_t *p_info; int i, rc = 0; @@ -546,42 +675,26 @@ static int debug_open(struct inode *inode, struct file *file) goto out; found: - - /* Make snapshot of current debug areas to get it consistent. */ - /* To copy all the areas is only needed, if we have a view which */ - /* formats the debug areas. */ - - if (!debug_info->views[i]->format_proc && !debug_info->views[i]->header_proc) - debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS); - else - debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS); - - if (!debug_info_snapshot) { - rc = -ENOMEM; - goto out; - } - p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL); + p_info = debug_file_private_alloc(debug_info, debug_info->views[i]); if (!p_info) { - debug_info_free(debug_info_snapshot); rc = -ENOMEM; goto out; } - p_info->offset = 0; - p_info->debug_info_snap = debug_info_snapshot; - p_info->debug_info_org = debug_info; - p_info->view = debug_info->views[i]; - p_info->act_area = 0; - p_info->act_page = 0; - p_info->act_entry = DEBUG_PROLOG_ENTRY; - p_info->act_entry_offset = 0; file->private_data = p_info; - debug_info_get(debug_info); nonseekable_open(inode, file); out: mutex_unlock(&debug_mutex); return rc; } +static void debug_file_private_free(file_private_info_t *p_info) +{ + if (p_info->debug_info_snap) + debug_info_free(p_info->debug_info_snap); + debug_info_put(p_info->debug_info_org); + kfree(p_info); +} + /* * debug_close: * - called for user close() @@ -592,13 +705,59 @@ static int debug_close(struct inode *inode, struct file *file) file_private_info_t *p_info; p_info = (file_private_info_t *) file->private_data; - if (p_info->debug_info_snap) - debug_info_free(p_info->debug_info_snap); - debug_info_put(p_info->debug_info_org); - kfree(file->private_data); + debug_file_private_free(p_info); + file->private_data = NULL; return 0; /* success */ } +/** + * debug_dump - Get a textual representation of debug info, or as much as fits + * @id: Debug information to use + * @view: View with which to dump the debug information + * @buf: Buffer the textual debug data representation is written to + * @buf_size: Size of the buffer, including the trailing '\0' byte + * @reverse: Go backwards from the last written entry + * + * This function may be used whenever a textual representation of the debug + * information is required without using an s390dbf file. + * + * Note: It is the callers responsibility to supply a view that is compatible + * with the debug information data. + * + * Return: On success returns the number of bytes written to the buffer not + * including the trailing '\0' byte. If bug_size == 0 the function returns 0. + * On failure an error code less than 0 is returned. + */ +ssize_t debug_dump(debug_info_t *id, struct debug_view *view, + char *buf, size_t buf_size, bool reverse) +{ + file_private_info_t *p_info; + size_t size, offset = 0; + + /* Need space for '\0' byte */ + if (buf_size < 1) + return 0; + buf_size--; + + p_info = debug_file_private_alloc(id, view); + if (!p_info) + return -ENOMEM; + + /* There is always at least the DEBUG_PROLOG_ENTRY */ + do { + size = debug_format_entry(p_info); + size = min(size, buf_size - offset); + memcpy(buf + offset, p_info->temp_buf, size); + offset += size; + if (offset >= buf_size) + break; + } while (debug_move_entry(p_info, reverse)); + debug_file_private_free(p_info); + buf[offset] = '\0'; + + return offset; +} + /* Create debugfs entries and add to internal list. */ static void _debug_register(debug_info_t *id) { @@ -962,7 +1121,7 @@ static int s390dbf_procactive(const struct ctl_table *table, int write, return 0; } -static struct ctl_table s390dbf_table[] = { +static const struct ctl_table s390dbf_table[] = { { .procname = "debug_stoppable", .data = &debug_stoppable, @@ -1256,18 +1415,12 @@ static inline char *debug_get_user_string(const char __user *user_buf, { char *buffer; - buffer = kmalloc(user_len + 1, GFP_KERNEL); - if (!buffer) - return ERR_PTR(-ENOMEM); - if (copy_from_user(buffer, user_buf, user_len) != 0) { - kfree(buffer); - return ERR_PTR(-EFAULT); - } + buffer = memdup_user_nul(user_buf, user_len); + if (IS_ERR(buffer)) + return buffer; /* got the string, now strip linefeed. */ if (buffer[user_len - 1] == '\n') buffer[user_len - 1] = 0; - else - buffer[user_len] = 0; return buffer; } @@ -1292,9 +1445,9 @@ static inline int debug_get_uint(char *buf) */ static int debug_prolog_pages_fn(debug_info_t *id, struct debug_view *view, - char *out_buf) + char *out_buf, size_t out_buf_size) { - return sprintf(out_buf, "%i\n", id->pages_per_area); + return scnprintf(out_buf, out_buf_size, "%i\n", id->pages_per_area); } /* @@ -1341,14 +1494,14 @@ out: * prints out actual debug level */ static int debug_prolog_level_fn(debug_info_t *id, struct debug_view *view, - char *out_buf) + char *out_buf, size_t out_buf_size) { int rc = 0; if (id->level == DEBUG_OFF_LEVEL) - rc = sprintf(out_buf, "-\n"); + rc = scnprintf(out_buf, out_buf_size, "-\n"); else - rc = sprintf(out_buf, "%i\n", id->level); + rc = scnprintf(out_buf, out_buf_size, "%i\n", id->level); return rc; } @@ -1465,22 +1618,24 @@ out: * prints debug data in hex/ascii format */ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *in_buf) + char *out_buf, size_t out_buf_size, const char *in_buf) { int i, rc = 0; - for (i = 0; i < id->buf_size; i++) - rc += sprintf(out_buf + rc, "%02x ", ((unsigned char *) in_buf)[i]); - rc += sprintf(out_buf + rc, "| "); + for (i = 0; i < id->buf_size; i++) { + rc += scnprintf(out_buf + rc, out_buf_size - rc, + "%02x ", ((unsigned char *)in_buf)[i]); + } + rc += scnprintf(out_buf + rc, out_buf_size - rc, "| "); for (i = 0; i < id->buf_size; i++) { unsigned char c = in_buf[i]; if (isascii(c) && isprint(c)) - rc += sprintf(out_buf + rc, "%c", c); + rc += scnprintf(out_buf + rc, out_buf_size - rc, "%c", c); else - rc += sprintf(out_buf + rc, "."); + rc += scnprintf(out_buf + rc, out_buf_size - rc, "."); } - rc += sprintf(out_buf + rc, "\n"); + rc += scnprintf(out_buf + rc, out_buf_size - rc, "\n"); return rc; } @@ -1488,7 +1643,8 @@ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, * prints header for debug entry */ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, - int area, debug_entry_t *entry, char *out_buf) + int area, debug_entry_t *entry, char *out_buf, + size_t out_buf_size) { unsigned long sec, usec; unsigned long caller; @@ -1505,22 +1661,22 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, else except_str = "-"; caller = (unsigned long) entry->caller; - rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %px ", - area, sec, usec, level, except_str, - entry->cpu, (void *)caller); + rc += scnprintf(out_buf, out_buf_size, "%02i %011ld:%06lu %1u %1s %04u %px ", + area, sec, usec, level, except_str, + entry->cpu, (void *)caller); return rc; } EXPORT_SYMBOL(debug_dflt_header_fn); /* * prints debug data sprintf-formatted: - * debug_sprinf_event/exception calls must be used together with this view + * debug_sprintf_event/exception calls must be used together with this view */ #define DEBUG_SPRINTF_MAX_ARGS 10 -static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *inbuf) +int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, + char *out_buf, size_t out_buf_size, const char *inbuf) { debug_sprintf_entry_t *curr_event = (debug_sprintf_entry_t *)inbuf; int num_longs, num_used_args = 0, i, rc = 0; @@ -1533,8 +1689,9 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, goto out; /* bufsize of entry too small */ if (num_longs == 1) { /* no args, we use only the string */ - strcpy(out_buf, curr_event->string); - rc = strlen(curr_event->string); + rc = strscpy(out_buf, curr_event->string, out_buf_size); + if (rc == -E2BIG) + rc = out_buf_size; goto out; } @@ -1546,15 +1703,17 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, for (i = 0; i < num_used_args; i++) index[i] = i; - rc = sprintf(out_buf, curr_event->string, curr_event->args[index[0]], - curr_event->args[index[1]], curr_event->args[index[2]], - curr_event->args[index[3]], curr_event->args[index[4]], - curr_event->args[index[5]], curr_event->args[index[6]], - curr_event->args[index[7]], curr_event->args[index[8]], - curr_event->args[index[9]]); + rc = scnprintf(out_buf, out_buf_size, + curr_event->string, curr_event->args[index[0]], + curr_event->args[index[1]], curr_event->args[index[2]], + curr_event->args[index[3]], curr_event->args[index[4]], + curr_event->args[index[5]], curr_event->args[index[6]], + curr_event->args[index[7]], curr_event->args[index[8]], + curr_event->args[index[9]]); out: return rc; } +EXPORT_SYMBOL(debug_sprintf_format_fn); /* * debug_init: diff --git a/arch/s390/kernel/diag/Makefile b/arch/s390/kernel/diag/Makefile new file mode 100644 index 000000000000..956aee6c4090 --- /dev/null +++ b/arch/s390/kernel/diag/Makefile @@ -0,0 +1 @@ +obj-y := diag_misc.o diag324.o diag.o diag310.o diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag/diag.c index cdd6e31344fa..56b862ba9be8 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag/diag.c @@ -17,7 +17,7 @@ #include <asm/trace/diag.h> #include <asm/sections.h> #include <asm/asm.h> -#include "entry.h" +#include "../entry.h" struct diag_stat { unsigned int counter[NR_DIAG_STAT]; @@ -51,8 +51,10 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = { [DIAG_STAT_X2FC] = { .code = 0x2fc, .name = "Guest Performance Data" }, [DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" }, [DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" }, + [DIAG_STAT_X310] = { .code = 0x310, .name = "Memory Topology Information" }, [DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" }, [DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" }, + [DIAG_STAT_X324] = { .code = 0x324, .name = "Power Information Block" }, [DIAG_STAT_X49C] = { .code = 0x49c, .name = "Warning-Track Interruption" }, [DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" }, }; @@ -193,7 +195,7 @@ static inline int __diag204(unsigned long *subcode, unsigned long size, void *ad { union register_pair rp = { .even = *subcode, .odd = size }; - asm volatile( + asm_inline volatile( " diag %[addr],%[rp],0x204\n" "0: nopr %%r7\n" EX_TABLE(0b,0b) @@ -284,7 +286,7 @@ int diag224(void *ptr) int rc = -EOPNOTSUPP; diag_stat_inc(DIAG_STAT_X224); - asm volatile("\n" + asm_inline volatile("\n" " diag %[type],%[addr],0x224\n" "0: lhi %[rc],0\n" "1:\n" diff --git a/arch/s390/kernel/diag/diag310.c b/arch/s390/kernel/diag/diag310.c new file mode 100644 index 000000000000..f411562aa7f6 --- /dev/null +++ b/arch/s390/kernel/diag/diag310.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Request memory topology information via diag0x310. + * + * Copyright IBM Corp. 2025 + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> +#include <asm/diag.h> +#include <asm/sclp.h> +#include <uapi/asm/diag.h> +#include "diag_ioctl.h" + +#define DIAG310_LEVELMIN 1 +#define DIAG310_LEVELMAX 6 + +enum diag310_sc { + DIAG310_SUBC_0 = 0, + DIAG310_SUBC_1 = 1, + DIAG310_SUBC_4 = 4, + DIAG310_SUBC_5 = 5 +}; + +enum diag310_retcode { + DIAG310_RET_SUCCESS = 0x0001, + DIAG310_RET_BUSY = 0x0101, + DIAG310_RET_OPNOTSUPP = 0x0102, + DIAG310_RET_SC4_INVAL = 0x0401, + DIAG310_RET_SC4_NODATA = 0x0402, + DIAG310_RET_SC5_INVAL = 0x0501, + DIAG310_RET_SC5_NODATA = 0x0502, + DIAG310_RET_SC5_ESIZE = 0x0503 +}; + +union diag310_response { + u64 response; + struct { + u64 result : 32; + u64 : 16; + u64 rc : 16; + }; +}; + +union diag310_req_subcode { + u64 subcode; + struct { + u64 : 48; + u64 st : 8; + u64 sc : 8; + }; +}; + +union diag310_req_size { + u64 size; + struct { + u64 page_count : 32; + u64 : 32; + }; +}; + +static inline unsigned long diag310(unsigned long subcode, unsigned long size, void *addr) +{ + union register_pair rp = { .even = (unsigned long)addr, .odd = size }; + + diag_stat_inc(DIAG_STAT_X310); + asm volatile("diag %[rp],%[subcode],0x310" + : [rp] "+d" (rp.pair) + : [subcode] "d" (subcode) + : "memory"); + return rp.odd; +} + +static int diag310_result_to_errno(unsigned int result) +{ + switch (result) { + case DIAG310_RET_BUSY: + return -EBUSY; + case DIAG310_RET_OPNOTSUPP: + return -EOPNOTSUPP; + default: + return -EINVAL; + } +} + +static int diag310_get_subcode_mask(unsigned long *mask) +{ + union diag310_response res; + + res.response = diag310(DIAG310_SUBC_0, 0, NULL); + if (res.rc != DIAG310_RET_SUCCESS) + return diag310_result_to_errno(res.rc); + *mask = res.response; + return 0; +} + +static int diag310_get_memtop_stride(unsigned long *stride) +{ + union diag310_response res; + + res.response = diag310(DIAG310_SUBC_1, 0, NULL); + if (res.rc != DIAG310_RET_SUCCESS) + return diag310_result_to_errno(res.rc); + *stride = res.result; + return 0; +} + +static int diag310_get_memtop_size(unsigned long *pages, unsigned long level) +{ + union diag310_req_subcode req = { .sc = DIAG310_SUBC_4, .st = level }; + union diag310_response res; + + res.response = diag310(req.subcode, 0, NULL); + switch (res.rc) { + case DIAG310_RET_SUCCESS: + *pages = res.result; + return 0; + case DIAG310_RET_SC4_NODATA: + return -ENODATA; + case DIAG310_RET_SC4_INVAL: + return -EINVAL; + default: + return diag310_result_to_errno(res.rc); + } +} + +static int diag310_store_topology_map(void *buf, unsigned long pages, unsigned long level) +{ + union diag310_req_subcode req_sc = { .sc = DIAG310_SUBC_5, .st = level }; + union diag310_req_size req_size = { .page_count = pages }; + union diag310_response res; + + res.response = diag310(req_sc.subcode, req_size.size, buf); + switch (res.rc) { + case DIAG310_RET_SUCCESS: + return 0; + case DIAG310_RET_SC5_NODATA: + return -ENODATA; + case DIAG310_RET_SC5_ESIZE: + return -EOVERFLOW; + case DIAG310_RET_SC5_INVAL: + return -EINVAL; + default: + return diag310_result_to_errno(res.rc); + } +} + +static int diag310_check_features(void) +{ + static int features_available; + unsigned long mask; + int rc; + + if (READ_ONCE(features_available)) + return 0; + if (!sclp.has_diag310) + return -EOPNOTSUPP; + rc = diag310_get_subcode_mask(&mask); + if (rc) + return rc; + if (!test_bit_inv(DIAG310_SUBC_1, &mask)) + return -EOPNOTSUPP; + if (!test_bit_inv(DIAG310_SUBC_4, &mask)) + return -EOPNOTSUPP; + if (!test_bit_inv(DIAG310_SUBC_5, &mask)) + return -EOPNOTSUPP; + WRITE_ONCE(features_available, 1); + return 0; +} + +static int memtop_get_stride_len(unsigned long *res) +{ + static unsigned long memtop_stride; + unsigned long stride; + int rc; + + stride = READ_ONCE(memtop_stride); + if (!stride) { + rc = diag310_get_memtop_stride(&stride); + if (rc) + return rc; + WRITE_ONCE(memtop_stride, stride); + } + *res = stride; + return 0; +} + +static int memtop_get_page_count(unsigned long *res, unsigned long level) +{ + static unsigned long memtop_pages[DIAG310_LEVELMAX]; + unsigned long pages; + int rc; + + if (level > DIAG310_LEVELMAX || level < DIAG310_LEVELMIN) + return -EINVAL; + pages = READ_ONCE(memtop_pages[level - 1]); + if (!pages) { + rc = diag310_get_memtop_size(&pages, level); + if (rc) + return rc; + WRITE_ONCE(memtop_pages[level - 1], pages); + } + *res = pages; + return 0; +} + +long diag310_memtop_stride(unsigned long arg) +{ + size_t __user *argp = (void __user *)arg; + unsigned long stride; + int rc; + + rc = diag310_check_features(); + if (rc) + return rc; + rc = memtop_get_stride_len(&stride); + if (rc) + return rc; + if (put_user(stride, argp)) + return -EFAULT; + return 0; +} + +long diag310_memtop_len(unsigned long arg) +{ + size_t __user *argp = (void __user *)arg; + unsigned long pages, level; + int rc; + + rc = diag310_check_features(); + if (rc) + return rc; + if (get_user(level, argp)) + return -EFAULT; + rc = memtop_get_page_count(&pages, level); + if (rc) + return rc; + if (put_user(pages * PAGE_SIZE, argp)) + return -EFAULT; + return 0; +} + +long diag310_memtop_buf(unsigned long arg) +{ + struct diag310_memtop __user *udata = (struct diag310_memtop __user *)arg; + unsigned long level, pages, data_size; + u64 address; + void *buf; + int rc; + + rc = diag310_check_features(); + if (rc) + return rc; + if (get_user(level, &udata->nesting_lvl)) + return -EFAULT; + if (get_user(address, &udata->address)) + return -EFAULT; + rc = memtop_get_page_count(&pages, level); + if (rc) + return rc; + data_size = pages * PAGE_SIZE; + buf = __vmalloc_node(data_size, PAGE_SIZE, GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT, + NUMA_NO_NODE, __builtin_return_address(0)); + if (!buf) + return -ENOMEM; + rc = diag310_store_topology_map(buf, pages, level); + if (rc) + goto out; + if (copy_to_user((void __user *)address, buf, data_size)) + rc = -EFAULT; +out: + vfree(buf); + return rc; +} diff --git a/arch/s390/kernel/diag/diag324.c b/arch/s390/kernel/diag/diag324.c new file mode 100644 index 000000000000..fe325c2a2d0d --- /dev/null +++ b/arch/s390/kernel/diag/diag324.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Request power readings for resources in a computing environment via + * diag 0x324. diag 0x324 stores the power readings in the power information + * block (pib). + * + * Copyright IBM Corp. 2024 + */ + +#define pr_fmt(fmt) "diag324: " fmt +#include <linux/fs.h> +#include <linux/gfp.h> +#include <linux/ioctl.h> +#include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/ktime.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/timer.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> + +#include <asm/diag.h> +#include <asm/sclp.h> +#include <asm/timex.h> +#include <uapi/asm/diag.h> +#include "diag_ioctl.h" + +enum subcode { + DIAG324_SUBC_0 = 0, + DIAG324_SUBC_1 = 1, + DIAG324_SUBC_2 = 2, +}; + +enum retcode { + DIAG324_RET_SUCCESS = 0x0001, + DIAG324_RET_SUBC_NOTAVAIL = 0x0103, + DIAG324_RET_INSUFFICIENT_SIZE = 0x0104, + DIAG324_RET_READING_UNAVAILABLE = 0x0105, +}; + +union diag324_response { + u64 response; + struct { + u64 installed : 32; + u64 : 16; + u64 rc : 16; + } sc0; + struct { + u64 format : 16; + u64 : 16; + u64 pib_len : 16; + u64 rc : 16; + } sc1; + struct { + u64 : 48; + u64 rc : 16; + } sc2; +}; + +union diag324_request { + u64 request; + struct { + u64 : 32; + u64 allocated : 16; + u64 : 12; + u64 sc : 4; + } sc2; +}; + +struct pib { + u32 : 8; + u32 num : 8; + u32 len : 16; + u32 : 24; + u32 hlen : 8; + u64 : 64; + u64 intv; + u8 r[]; +} __packed; + +struct pibdata { + struct pib *pib; + ktime_t expire; + u64 sequence; + size_t len; + int rc; +}; + +static DEFINE_MUTEX(pibmutex); +static struct pibdata pibdata; + +#define PIBWORK_DELAY (5 * NSEC_PER_SEC) + +static void pibwork_handler(struct work_struct *work); +static DECLARE_DELAYED_WORK(pibwork, pibwork_handler); + +static unsigned long diag324(unsigned long subcode, void *addr) +{ + union register_pair rp = { .even = (unsigned long)addr }; + + diag_stat_inc(DIAG_STAT_X324); + asm volatile("diag %[rp],%[subcode],0x324" + : [rp] "+d" (rp.pair) + : [subcode] "d" (subcode) + : "memory"); + return rp.odd; +} + +static void pibwork_handler(struct work_struct *work) +{ + struct pibdata *data = &pibdata; + ktime_t timedout; + + mutex_lock(&pibmutex); + timedout = ktime_add_ns(data->expire, PIBWORK_DELAY); + if (ktime_before(ktime_get(), timedout)) { + mod_delayed_work(system_percpu_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY)); + goto out; + } + vfree(data->pib); + data->pib = NULL; +out: + mutex_unlock(&pibmutex); +} + +static void pib_update(struct pibdata *data) +{ + union diag324_request req = { .sc2.sc = DIAG324_SUBC_2, .sc2.allocated = data->len }; + union diag324_response res; + int rc; + + memset(data->pib, 0, data->len); + res.response = diag324(req.request, data->pib); + switch (res.sc2.rc) { + case DIAG324_RET_SUCCESS: + rc = 0; + break; + case DIAG324_RET_SUBC_NOTAVAIL: + rc = -ENOENT; + break; + case DIAG324_RET_INSUFFICIENT_SIZE: + rc = -EMSGSIZE; + break; + case DIAG324_RET_READING_UNAVAILABLE: + rc = -EBUSY; + break; + default: + rc = -EINVAL; + } + data->rc = rc; +} + +long diag324_pibbuf(unsigned long arg) +{ + struct diag324_pib __user *udata = (struct diag324_pib __user *)arg; + struct pibdata *data = &pibdata; + static bool first = true; + u64 address; + int rc; + + if (!data->len) + return -EOPNOTSUPP; + if (get_user(address, &udata->address)) + return -EFAULT; + mutex_lock(&pibmutex); + rc = -ENOMEM; + if (!data->pib) + data->pib = vmalloc(data->len); + if (!data->pib) + goto out; + if (first || ktime_after(ktime_get(), data->expire)) { + pib_update(data); + data->sequence++; + data->expire = ktime_add_ns(ktime_get(), tod_to_ns(data->pib->intv)); + mod_delayed_work(system_percpu_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY)); + first = false; + } + rc = data->rc; + if (rc != 0 && rc != -EBUSY) + goto out; + rc = copy_to_user((void __user *)address, data->pib, data->pib->len); + rc |= put_user(data->sequence, &udata->sequence); + if (rc) + rc = -EFAULT; +out: + mutex_unlock(&pibmutex); + return rc; +} + +long diag324_piblen(unsigned long arg) +{ + struct pibdata *data = &pibdata; + + if (!data->len) + return -EOPNOTSUPP; + if (put_user(data->len, (size_t __user *)arg)) + return -EFAULT; + return 0; +} + +static int __init diag324_init(void) +{ + union diag324_response res; + unsigned long installed; + + if (!sclp.has_diag324) + return -EOPNOTSUPP; + res.response = diag324(DIAG324_SUBC_0, NULL); + if (res.sc0.rc != DIAG324_RET_SUCCESS) + return -EOPNOTSUPP; + installed = res.response; + if (!test_bit_inv(DIAG324_SUBC_1, &installed)) + return -EOPNOTSUPP; + if (!test_bit_inv(DIAG324_SUBC_2, &installed)) + return -EOPNOTSUPP; + res.response = diag324(DIAG324_SUBC_1, NULL); + if (res.sc1.rc != DIAG324_RET_SUCCESS) + return -EOPNOTSUPP; + pibdata.len = res.sc1.pib_len; + return 0; +} +device_initcall(diag324_init); diff --git a/arch/s390/kernel/diag/diag_ioctl.h b/arch/s390/kernel/diag/diag_ioctl.h new file mode 100644 index 000000000000..7080be946785 --- /dev/null +++ b/arch/s390/kernel/diag/diag_ioctl.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _DIAG_IOCTL_H +#define _DIAG_IOCTL_H + +#include <linux/types.h> + +long diag324_pibbuf(unsigned long arg); +long diag324_piblen(unsigned long arg); + +long diag310_memtop_stride(unsigned long arg); +long diag310_memtop_len(unsigned long arg); +long diag310_memtop_buf(unsigned long arg); + +#endif /* _DIAG_IOCTL_H */ diff --git a/arch/s390/kernel/diag/diag_misc.c b/arch/s390/kernel/diag/diag_misc.c new file mode 100644 index 000000000000..efffe02ea02e --- /dev/null +++ b/arch/s390/kernel/diag/diag_misc.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Provide diagnose information via misc device /dev/diag. + * + * Copyright IBM Corp. 2024 + */ + +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/ioctl.h> +#include <linux/kernel.h> +#include <linux/miscdevice.h> +#include <linux/types.h> + +#include <uapi/asm/diag.h> +#include "diag_ioctl.h" + +static long diag_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + long rc; + + switch (cmd) { + case DIAG324_GET_PIBLEN: + rc = diag324_piblen(arg); + break; + case DIAG324_GET_PIBBUF: + rc = diag324_pibbuf(arg); + break; + case DIAG310_GET_STRIDE: + rc = diag310_memtop_stride(arg); + break; + case DIAG310_GET_MEMTOPLEN: + rc = diag310_memtop_len(arg); + break; + case DIAG310_GET_MEMTOPBUF: + rc = diag310_memtop_buf(arg); + break; + default: + rc = -ENOIOCTLCMD; + break; + } + return rc; +} + +static const struct file_operations fops = { + .owner = THIS_MODULE, + .open = nonseekable_open, + .unlocked_ioctl = diag_ioctl, +}; + +static struct miscdevice diagdev = { + .name = "diag", + .minor = MISC_DYNAMIC_MINOR, + .fops = &fops, + .mode = 0444, +}; + +static int diag_init(void) +{ + return misc_register(&diagdev); +} + +device_initcall(diag_init); diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 94eb8168ea44..1cec93895b3a 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -17,7 +17,6 @@ #include <linux/init.h> #include <linux/interrupt.h> #include <linux/delay.h> -#include <linux/export.h> #include <linux/kallsyms.h> #include <linux/reboot.h> #include <linux/kprobes.h> @@ -504,24 +503,27 @@ static int copy_from_regs(struct pt_regs *regs, void *dst, void *src, int len) void show_code(struct pt_regs *regs) { char *mode = user_mode(regs) ? "User" : "Krnl"; + unsigned long addr, pswaddr; unsigned char code[64]; char buffer[128], *ptr; - unsigned long addr; int start, end, opsize, hops, i; + pswaddr = regs->psw.addr; + if (test_pt_regs_flag(regs, PIF_PSW_ADDR_ADJUSTED)) + pswaddr = __forward_psw(regs->psw, regs->int_code >> 16); /* Get a snapshot of the 64 bytes surrounding the fault address. */ - for (start = 32; start && regs->psw.addr >= 34 - start; start -= 2) { - addr = regs->psw.addr - 34 + start; + for (start = 32; start && pswaddr >= 34 - start; start -= 2) { + addr = pswaddr - 34 + start; if (copy_from_regs(regs, code + start - 2, (void *)addr, 2)) break; } for (end = 32; end < 64; end += 2) { - addr = regs->psw.addr + end - 32; + addr = pswaddr + end - 32; if (copy_from_regs(regs, code + end, (void *)addr, 2)) break; } /* Code snapshot usable ? */ - if ((regs->psw.addr & 1) || start >= end) { + if ((pswaddr & 1) || start >= end) { printk("%s Code: Bad PSW.\n", mode); return; } @@ -544,12 +546,12 @@ void show_code(struct pt_regs *regs) while (start < end && hops < 8) { opsize = insn_length(code[start]); if (start + opsize == 32) - *ptr++ = '#'; + *ptr++ = '*'; else if (start == 32) *ptr++ = '>'; else *ptr++ = ' '; - addr = regs->psw.addr + start - 32; + addr = pswaddr + start - 32; ptr += sprintf(ptr, "%px: ", (void *)addr); if (start + opsize >= end) break; diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 1ecd0580561f..f9d52e05e01e 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -17,6 +17,7 @@ #include <linux/sched.h> #include <linux/sched/debug.h> #include <linux/sched/task_stack.h> +#include <asm/asm-offsets.h> #include <asm/processor.h> #include <asm/debug.h> #include <asm/dis.h> @@ -154,12 +155,16 @@ static void show_last_breaking_event(struct pt_regs *regs) void show_registers(struct pt_regs *regs) { struct psw_bits *psw = &psw_bits(regs->psw); + unsigned long pswaddr; char *mode; + pswaddr = regs->psw.addr; + if (test_pt_regs_flag(regs, PIF_PSW_ADDR_ADJUSTED)) + pswaddr = __forward_psw(regs->psw, regs->int_code >> 16); mode = user_mode(regs) ? "User" : "Krnl"; - printk("%s PSW : %px %px", mode, (void *)regs->psw.mask, (void *)regs->psw.addr); + printk("%s PSW : %px %px", mode, (void *)regs->psw.mask, (void *)pswaddr); if (!user_mode(regs)) - pr_cont(" (%pSR)", (void *)regs->psw.addr); + pr_cont(" (%pSR)", (void *)pswaddr); pr_cont("\n"); printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " "P:%x AS:%x CC:%x PM:%x", psw->per, psw->dat, psw->io, psw->ext, @@ -198,13 +203,8 @@ void __noreturn die(struct pt_regs *regs, const char *str) console_verbose(); spin_lock_irq(&die_lock); bust_spinlocks(1); - printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff, + printk("%s: %04x ilc:%d [#%d]", str, regs->int_code & 0xffff, regs->int_code >> 17, ++die_counter); -#ifdef CONFIG_PREEMPT - pr_cont("PREEMPT "); -#elif defined(CONFIG_PREEMPT_RT) - pr_cont("PREEMPT_RT "); -#endif pr_cont("SMP "); if (debug_pagealloc_enabled()) pr_cont("DEBUG_PAGEALLOC"); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 62f8f5a750a3..b27239c03d79 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -4,10 +4,10 @@ * Author(s): Hongjie Yang <hongjie@us.ibm.com>, */ -#define KMSG_COMPONENT "setup" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "setup: " fmt #include <linux/sched/debug.h> +#include <linux/cpufeature.h> #include <linux/compiler.h> #include <linux/init.h> #include <linux/errno.h> @@ -20,7 +20,10 @@ #include <linux/kernel.h> #include <asm/asm-extable.h> #include <linux/memblock.h> +#include <linux/kasan.h> #include <asm/access-regs.h> +#include <asm/asm-offsets.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/ebcdic.h> #include <asm/fpu.h> @@ -36,12 +39,14 @@ #include <asm/boot_data.h> #include "entry.h" -#define decompressor_handled_param(param) \ -static int __init ignore_decompressor_param_##param(char *s) \ +#define __decompressor_handled_param(func, param) \ +static int __init ignore_decompressor_param_##func(char *s) \ { \ return 0; \ } \ -early_param(#param, ignore_decompressor_param_##param) +early_param(#param, ignore_decompressor_param_##func) + +#define decompressor_handled_param(param) __decompressor_handled_param(param, param) decompressor_handled_param(mem); decompressor_handled_param(vmalloc); @@ -50,6 +55,8 @@ decompressor_handled_param(facilities); decompressor_handled_param(nokaslr); decompressor_handled_param(cmma); decompressor_handled_param(relocate_lowcore); +decompressor_handled_param(bootdebug); +__decompressor_handled_param(debug_alternative, debug-alternative); #if IS_ENABLED(CONFIG_KVM) decompressor_handled_param(prot_virt); #endif @@ -58,25 +65,10 @@ static void __init kasan_early_init(void) { #ifdef CONFIG_KASAN init_task.kasan_depth = 0; - sclp_early_printk("KernelAddressSanitizer initialized\n"); + kasan_init_generic(); #endif } -static void __init reset_tod_clock(void) -{ - union tod_clock clk; - - if (store_tod_clock_ext_cc(&clk) == 0) - return; - /* TOD clock not running. Set the clock to Unix Epoch. */ - if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk)) - disabled_wait(); - - memset(&tod_clock_base, 0, sizeof(tod_clock_base)); - tod_clock_base.tod = TOD_UNIX_EPOCH; - get_lowcore()->last_update_clock = TOD_UNIX_EPOCH; -} - /* * Initialize storage key for kernel pages */ @@ -95,26 +87,6 @@ static noinline __init void init_kernel_storage_key(void) static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE); -static noinline __init void detect_machine_type(void) -{ - struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page; - - /* Check current-configuration-level */ - if (stsi(NULL, 0, 0, 0) <= 2) { - get_lowcore()->machine_flags |= MACHINE_FLAG_LPAR; - return; - } - /* Get virtual-machine cpu information. */ - if (stsi(vmms, 3, 2, 2) || !vmms->count) - return; - - /* Detect known hypervisors */ - if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) - get_lowcore()->machine_flags |= MACHINE_FLAG_KVM; - else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4)) - get_lowcore()->machine_flags |= MACHINE_FLAG_VM; -} - /* Remove leading, trailing and double whitespace. */ static inline void strim_all(char *str) { @@ -133,6 +105,8 @@ static inline void strim_all(char *str) } } +char arch_hw_string[128]; + static noinline __init void setup_arch_string(void) { struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page; @@ -145,20 +119,21 @@ static noinline __init void setup_arch_string(void) EBCASC(mach->type, sizeof(mach->type)); EBCASC(mach->model, sizeof(mach->model)); EBCASC(mach->model_capacity, sizeof(mach->model_capacity)); - sprintf(mstr, "%-16.16s %-4.4s %-16.16s %-16.16s", - mach->manufacturer, mach->type, - mach->model, mach->model_capacity); + scnprintf(mstr, sizeof(mstr), "%-16.16s %-4.4s %-16.16s %-16.16s", + mach->manufacturer, mach->type, + mach->model, mach->model_capacity); strim_all(mstr); if (stsi(vm, 3, 2, 2) == 0 && vm->count) { EBCASC(vm->vm[0].cpi, sizeof(vm->vm[0].cpi)); - sprintf(hvstr, "%-16.16s", vm->vm[0].cpi); + scnprintf(hvstr, sizeof(hvstr), "%-16.16s", vm->vm[0].cpi); strim_all(hvstr); } else { - sprintf(hvstr, "%s", - MACHINE_IS_LPAR ? "LPAR" : - MACHINE_IS_VM ? "z/VM" : - MACHINE_IS_KVM ? "KVM" : "unknown"); + scnprintf(hvstr, sizeof(hvstr), "%s", + machine_is_lpar() ? "LPAR" : + machine_is_vm() ? "z/VM" : + machine_is_kvm() ? "KVM" : "unknown"); } + scnprintf(arch_hw_string, sizeof(arch_hw_string), "HW: %s (%s)", mstr, hvstr); dump_stack_set_arch_desc("%s (%s)", mstr, hvstr); } @@ -166,9 +141,8 @@ static __init void setup_topology(void) { int max_mnest; - if (!test_facility(11)) + if (!cpu_has_topology()) return; - get_lowcore()->machine_flags |= MACHINE_FLAG_TOPOLOGY; for (max_mnest = 6; max_mnest > 1; max_mnest--) { if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0) break; @@ -183,6 +157,7 @@ void __init __do_early_pgm_check(struct pt_regs *regs) regs->int_code = lc->pgm_int_code; regs->int_parm_long = lc->trans_exc_code; + regs->last_break = lc->pgm_last_break; ip = __rewind_psw(regs->psw, regs->int_code >> 16); /* Monitor Event? Might be a warning */ @@ -217,65 +192,10 @@ static noinline __init void setup_lowcore_early(void) lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); } -static __init void detect_diag9c(void) -{ - unsigned int cpu_address; - int rc; - - cpu_address = stap(); - diag_stat_inc(DIAG_STAT_X09C); - asm volatile( - " diag %2,0,0x9c\n" - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc"); - if (!rc) - get_lowcore()->machine_flags |= MACHINE_FLAG_DIAG9C; -} - -static __init void detect_machine_facilities(void) -{ - if (test_facility(8)) { - get_lowcore()->machine_flags |= MACHINE_FLAG_EDAT1; - system_ctl_set_bit(0, CR0_EDAT_BIT); - } - if (test_facility(78)) - get_lowcore()->machine_flags |= MACHINE_FLAG_EDAT2; - if (test_facility(3)) - get_lowcore()->machine_flags |= MACHINE_FLAG_IDTE; - if (test_facility(50) && test_facility(73)) { - get_lowcore()->machine_flags |= MACHINE_FLAG_TE; - system_ctl_set_bit(0, CR0_TRANSACTIONAL_EXECUTION_BIT); - } - if (test_facility(51)) - get_lowcore()->machine_flags |= MACHINE_FLAG_TLB_LC; - if (test_facility(129)) - system_ctl_set_bit(0, CR0_VECTOR_BIT); - if (test_facility(130)) - get_lowcore()->machine_flags |= MACHINE_FLAG_NX; - if (test_facility(133)) - get_lowcore()->machine_flags |= MACHINE_FLAG_GS; - if (test_facility(139) && (tod_clock_base.tod >> 63)) { - /* Enabled signed clock comparator comparisons */ - get_lowcore()->machine_flags |= MACHINE_FLAG_SCC; - clock_comparator_max = -1ULL >> 1; - system_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SIGN_BIT); - } - if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) { - get_lowcore()->machine_flags |= MACHINE_FLAG_PCI_MIO; - /* the control bit is set during PCI initialization */ - } - if (test_facility(194)) - get_lowcore()->machine_flags |= MACHINE_FLAG_RDP; - if (test_facility(85)) - get_lowcore()->machine_flags |= MACHINE_FLAG_SEQ_INSN; -} - static inline void save_vector_registers(void) { #ifdef CONFIG_CRASH_DUMP - if (test_facility(129)) + if (cpu_has_vx()) save_vx_regs(boot_cpu_vector_save_area); #endif } @@ -307,17 +227,13 @@ static void __init sort_amode31_extable(void) void __init startup_init(void) { kasan_early_init(); - reset_tod_clock(); time_early_init(); init_kernel_storage_key(); lockdep_off(); sort_amode31_extable(); setup_lowcore_early(); - detect_machine_type(); setup_arch_string(); setup_boot_command_line(); - detect_diag9c(); - detect_machine_facilities(); save_vector_registers(); setup_topology(); sclp_early_detect(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 1ff13239d4e5..b7f1553d9ee5 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -29,6 +29,7 @@ #include <asm/nmi.h> #include <asm/nospec-insn.h> #include <asm/lowcore.h> +#include <asm/machine.h> _LPP_OFFSET = __LC_LPP @@ -44,7 +45,7 @@ _LPP_OFFSET = __LC_LPP ALTERNATIVE_2 "b \lpswe;nopr", \ ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY(193), \ __stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0), \ - ALT_LOWCORE + ALT_FEATURE(MFEATURE_LOWCORE) .endm .macro MBEAR reg, lowcore @@ -52,16 +53,7 @@ _LPP_OFFSET = __LC_LPP ALT_FACILITY(193) .endm - .macro CHECK_STACK savearea, lowcore -#ifdef CONFIG_CHECK_STACK - tml %r15,THREAD_SIZE - CONFIG_STACK_GUARD - la %r14,\savearea(\lowcore) - jz stack_overflow -#endif - .endm - .macro CHECK_VMAP_STACK savearea, lowcore, oklabel -#ifdef CONFIG_VMAP_STACK lgr %r14,%r15 nill %r14,0x10000 - THREAD_SIZE oill %r14,STACK_INIT_OFFSET @@ -76,10 +68,7 @@ _LPP_OFFSET = __LC_LPP clg %r14,__LC_RESTART_STACK(\lowcore) je \oklabel la %r14,\savearea(\lowcore) - j stack_overflow -#else - j \oklabel -#endif + j stack_invalid .endm /* @@ -127,7 +116,7 @@ _LPP_OFFSET = __LC_LPP .macro SIEEXIT sie_control,lowcore lg %r9,\sie_control # get control block pointer ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_KERNEL_ASCE(\lowcore) # load primary asce + lctlg %c1,%c1,__LC_USER_ASCE(\lowcore) # load primary asce lg %r9,__LC_CURRENT(\lowcore) mvi __TI_sie(%r9),0 larl %r9,sie_exit # skip forward to sie_exit @@ -135,7 +124,7 @@ _LPP_OFFSET = __LC_LPP #endif .macro STACKLEAK_ERASE -#ifdef CONFIG_GCC_PLUGIN_STACKLEAK +#ifdef CONFIG_KSTACK_ERASE brasl %r14,stackleak_erase_on_task_stack #endif .endm @@ -173,9 +162,13 @@ SYM_FUNC_START(__switch_to_asm) stg %r3,__LC_CURRENT(%r13) # store task struct of next stg %r15,__LC_KERNEL_STACK(%r13) # store end of kernel stack lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next - aghi %r3,__TASK_pid - mvc __LC_CURRENT_PID(4,%r13),0(%r3) # store pid of next + lay %r4,__TASK_pid(%r3) + mvc __LC_CURRENT_PID(4,%r13),0(%r4) # store pid of next ALTERNATIVE "nop", "lpp _LPP_OFFSET(%r13)", ALT_FACILITY(40) +#ifdef CONFIG_STACKPROTECTOR + lg %r3,__TASK_stack_canary(%r3) + stg %r3,__LC_STACK_CANARY(%r13) +#endif lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task BR_EX %r14 SYM_FUNC_END(__switch_to_asm) @@ -200,6 +193,7 @@ SYM_FUNC_START(__sie64a) mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags lmg %r0,%r13,0(%r4) # load guest gprs 0-13 mvi __TI_sie(%r14),1 + stosm __SF_SIE_IRQ(%r15),0x03 # enable interrupts lctlg %c1,%c1,__SF_SIE_GUEST_ASCE(%r15) # load primary asce lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now @@ -219,10 +213,11 @@ SYM_FUNC_START(__sie64a) lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE GET_LC %r14 - lctlg %c1,%c1,__LC_KERNEL_ASCE(%r14) # load primary asce + lctlg %c1,%c1,__LC_USER_ASCE(%r14) # load primary asce lg %r14,__LC_CURRENT(%r14) mvi __TI_sie(%r14),0 SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL) + stnsm __SF_SIE_IRQ(%r15),0xfc # disable interrupts lg %r14,__SF_SIE_SAVEAREA(%r15) # load guest register save area stmg %r0,%r13,0(%r14) # save guest gprs 0-13 xgr %r0,%r0 # clear guest registers to @@ -251,7 +246,6 @@ SYM_CODE_START(system_call) lghi %r14,0 .Lsysc_per: STBEAR __LC_LAST_BREAK(%r13) - lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) lg %r15,__LC_KERNEL_STACK(%r13) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15) @@ -272,7 +266,6 @@ SYM_CODE_START(system_call) lgr %r3,%r14 brasl %r14,__do_syscall STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE(%r13) mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) BPON LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) @@ -289,7 +282,6 @@ SYM_CODE_START(ret_from_fork) brasl %r14,__ret_from_fork STACKLEAK_ERASE GET_LC %r13 - lctlg %c1,%c1,__LC_USER_ASCE(%r13) mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) BPON LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) @@ -310,10 +302,7 @@ SYM_CODE_START(pgm_check_handler) lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13) xgr %r10,%r10 tmhh %r8,0x0001 # coming from user space? - jno .Lpgm_skip_asce - lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) - j 3f # -> fault in user space -.Lpgm_skip_asce: + jo 3f # -> fault in user space #if IS_ENABLED(CONFIG_KVM) lg %r11,__LC_CURRENT(%r13) tm __TI_sie(%r11),0xff @@ -326,9 +315,8 @@ SYM_CODE_START(pgm_check_handler) jnz 2f # -> enabled, can't be a double fault tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception jnz .Lpgm_svcper # -> single stepped svc -2: CHECK_STACK __LC_SAVE_AREA,%r13 - aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - # CHECK_VMAP_STACK branches to stack_overflow or 4f +2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + # CHECK_VMAP_STACK branches to stack_invalid or 4f CHECK_VMAP_STACK __LC_SAVE_AREA,%r13,4f 3: lg %r15,__LC_KERNEL_STACK(%r13) 4: la %r11,STACK_FRAME_OVERHEAD(%r15) @@ -352,7 +340,6 @@ SYM_CODE_START(pgm_check_handler) tmhh %r8,0x0001 # returning to user space? jno .Lpgm_exit_kernel STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE(%r13) BPON stpt __LC_EXIT_TIMER(%r13) .Lpgm_exit_kernel: @@ -394,11 +381,9 @@ SYM_CODE_START(\name) BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST SIEEXIT __SF_SIE_CONTROL(%r15),%r13 #endif -0: CHECK_STACK __LC_SAVE_AREA,%r13 - aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) +0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j 2f -1: lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) - lg %r15,__LC_KERNEL_STACK(%r13) +1: lg %r15,__LC_KERNEL_STACK(%r13) 2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) @@ -421,7 +406,6 @@ SYM_CODE_START(\name) tmhh %r8,0x0001 # returning to user ? jno 2f STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE(%r13) BPON stpt __LC_EXIT_TIMER(%r13) 2: LBEAR __PT_LAST_BREAK(%r11) @@ -430,9 +414,13 @@ SYM_CODE_START(\name) SYM_CODE_END(\name) .endm + .section .irqentry.text, "ax" + INT_HANDLER ext_int_handler,__LC_EXT_OLD_PSW,do_ext_irq INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq + .section .kprobes.text, "ax" + /* * Machine check handler routines */ @@ -477,7 +465,7 @@ SYM_CODE_START(mcck_int_handler) clgrjl %r9,%r14, 4f larl %r14,.Lsie_leave clgrjhe %r9,%r14, 4f - lg %r10,__LC_PCPU + lg %r10,__LC_PCPU(%r13) oi __PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST 4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST SIEEXIT __SF_SIE_CONTROL(%r15),%r13 @@ -485,8 +473,6 @@ SYM_CODE_START(mcck_int_handler) .Lmcck_user: lg %r15,__LC_MCCK_STACK(%r13) la %r11,STACK_FRAME_OVERHEAD(%r15) - stctg %c1,%c1,__PT_CR1(%r11) - lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lay %r14,__LC_GPREGS_SAVE_AREA(%r13) mvc __PT_R0(128,%r11),0(%r14) @@ -504,7 +490,6 @@ SYM_CODE_START(mcck_int_handler) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,s390_do_machine_check - lctlg %c1,%c1,__PT_CR1(%r11) lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW tm __LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ? @@ -599,25 +584,24 @@ SYM_CODE_END(early_pgm_check_handler) .section .kprobes.text, "ax" -#if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK) /* - * The synchronous or the asynchronous stack overflowed. We are dead. + * The synchronous or the asynchronous stack pointer is invalid. We are dead. * No need to properly save the registers, we are going to panic anyway. * Setup a pt_regs so that show_trace can provide a good call trace. */ -SYM_CODE_START(stack_overflow) +SYM_CODE_START(stack_invalid) GET_LC %r15 lg %r15,__LC_NODAT_STACK(%r15) # change to panic stack la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) stmg %r8,%r9,__PT_PSW(%r11) mvc __PT_R8(64,%r11),0(%r14) - stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2 + GET_LC %r2 + mvc __PT_ORIG_GPR2(8,%r11),__LC_PGM_LAST_BREAK(%r2) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs - jg kernel_stack_overflow -SYM_CODE_END(stack_overflow) -#endif + jg kernel_stack_invalid +SYM_CODE_END(stack_invalid) .section .data, "aw" .balign 4 @@ -628,20 +612,3 @@ SYM_DATA_START_LOCAL(daton_psw) .quad PSW_KERNEL_BITS .quad .Ldaton SYM_DATA_END(daton_psw) - - .section .rodata, "a" - .balign 8 -#define SYSCALL(esame,emu) .quad __s390x_ ## esame -SYM_DATA_START(sys_call_table) -#include "asm/syscall_table.h" -SYM_DATA_END(sys_call_table) -#undef SYSCALL - -#ifdef CONFIG_COMPAT - -#define SYSCALL(esame,emu) .quad __s390_ ## emu -SYM_DATA_START(sys_call_table_emu) -#include "asm/syscall_table.h" -SYM_DATA_END(sys_call_table_emu) -#undef SYSCALL -#endif diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 21969520f947..dd55cc6bbc28 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -31,7 +31,7 @@ void do_secure_storage_access(struct pt_regs *regs); void do_non_secure_storage_access(struct pt_regs *regs); void do_secure_storage_violation(struct pt_regs *regs); void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str); -void kernel_stack_overflow(struct pt_regs * regs); +void kernel_stack_invalid(struct pt_regs *regs); void handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs); @@ -41,7 +41,6 @@ void do_restart(void *arg); void __init startup_init(void); void die(struct pt_regs *regs, const char *str); int setup_profiling_timer(unsigned int multiplier); -unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip); struct s390_mmap_arg_struct; struct fadvise64_64_args; diff --git a/arch/s390/kernel/facility.c b/arch/s390/kernel/facility.c index f02127219a27..d028b0be5c1d 100644 --- a/arch/s390/kernel/facility.c +++ b/arch/s390/kernel/facility.c @@ -3,6 +3,7 @@ * Copyright IBM Corp. 2023 */ +#include <linux/export.h> #include <asm/facility.h> unsigned int stfle_size(void) diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index 6f2e87920288..03a8973aec3c 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -5,6 +5,8 @@ * Copyright IBM Corp. 2015 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> */ + +#include <linux/export.h> #include <linux/kernel.h> #include <linux/cpu.h> #include <linux/sched.h> diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 51439a71e392..e94bb98f5231 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/kmsan-checks.h> +#include <linux/cpufeature.h> #include <linux/kprobes.h> #include <linux/execmem.h> #include <trace/syscall.h> @@ -69,7 +70,7 @@ static const char *ftrace_shared_hotpatch_trampoline(const char **end) bool ftrace_need_init_nop(void) { - return !MACHINE_HAS_SEQ_INSN; + return !cpu_has_seq_insn(); } int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) @@ -189,7 +190,7 @@ static int ftrace_modify_trampoline_call(struct dyn_ftrace *rec, int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { - if (MACHINE_HAS_SEQ_INSN) + if (cpu_has_seq_insn()) return ftrace_patch_branch_insn(rec->ip, old_addr, addr); else return ftrace_modify_trampoline_call(rec, old_addr, addr); @@ -213,8 +214,8 @@ static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable) int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - /* Expect brcl 0xf,... for the !MACHINE_HAS_SEQ_INSN case */ - if (MACHINE_HAS_SEQ_INSN) + /* Expect brcl 0xf,... for the !cpu_has_seq_insn() case */ + if (cpu_has_seq_insn()) return ftrace_patch_branch_insn(rec->ip, addr, 0); else return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false); @@ -234,7 +235,7 @@ static int ftrace_make_trampoline_call(struct dyn_ftrace *rec, unsigned long add int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - if (MACHINE_HAS_SEQ_INSN) + if (cpu_has_seq_insn()) return ftrace_patch_branch_insn(rec->ip, 0, addr); else return ftrace_make_trampoline_call(rec, addr); @@ -261,43 +262,19 @@ void ftrace_arch_code_modify_post_process(void) } #ifdef CONFIG_FUNCTION_GRAPH_TRACER -/* - * Hook the return address and push it in the stack of return addresses - * in current thread info. - */ -unsigned long prepare_ftrace_return(unsigned long ra, unsigned long sp, - unsigned long ip) -{ - if (unlikely(ftrace_graph_is_dead())) - goto out; - if (unlikely(atomic_read(¤t->tracing_graph_pause))) - goto out; - ip -= MCOUNT_INSN_SIZE; - if (!function_graph_enter(ra, ip, 0, (void *) sp)) - ra = (unsigned long) return_to_handler; -out: - return ra; -} -NOKPROBE_SYMBOL(prepare_ftrace_return); -/* - * Patch the kernel code at ftrace_graph_caller location. The instruction - * there is branch relative on condition. To enable the ftrace graph code - * block, we simply patch the mask field of the instruction to zero and - * turn the instruction into a nop. - * To disable the ftrace graph code the mask field will be patched to - * all ones, which turns the instruction into an unconditional branch. - */ -int ftrace_enable_ftrace_graph_caller(void) +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) { - /* Expect brc 0xf,... */ - return ftrace_patch_branch_mask(ftrace_graph_caller, 0xa7f4, false); -} + unsigned long *parent = &arch_ftrace_regs(fregs)->regs.gprs[14]; + unsigned long sp = arch_ftrace_regs(fregs)->regs.gprs[15]; -int ftrace_disable_ftrace_graph_caller(void) -{ - /* Expect brc 0x0,... */ - return ftrace_patch_branch_mask(ftrace_graph_caller, 0xa704, true); + if (unlikely(ftrace_graph_is_dead())) + return; + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + if (!function_graph_enter_regs(*parent, ip, 0, (unsigned long *)sp, fregs)) + *parent = (unsigned long)&return_to_handler; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c index 0b68168d9566..cf26d7a37425 100644 --- a/arch/s390/kernel/guarded_storage.c +++ b/arch/s390/kernel/guarded_storage.c @@ -4,6 +4,7 @@ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ +#include <linux/cpufeature.h> #include <linux/kernel.h> #include <linux/syscalls.h> #include <linux/signal.h> @@ -109,7 +110,7 @@ static int gs_broadcast(void) SYSCALL_DEFINE2(s390_guarded_storage, int, command, struct gs_cb __user *, gs_cb) { - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -EOPNOTSUPP; switch (command) { case GS_ENABLE: diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head.S index 396034b2fe67..7edb9ded199c 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head.S @@ -18,12 +18,10 @@ __HEAD SYM_CODE_START(startup_continue) - larl %r1,tod_clock_base - GET_LC %r2 - mvc 0(16,%r1),__LC_BOOT_CLOCK(%r2) # # Setup stack # + GET_LC %r2 larl %r14,init_task stg %r14,__LC_CURRENT(%r2) larl %r15,init_thread_union+STACK_INIT_OFFSET diff --git a/arch/s390/kernel/hiperdispatch.c b/arch/s390/kernel/hiperdispatch.c index 2a99a216ab62..217206522266 100644 --- a/arch/s390/kernel/hiperdispatch.c +++ b/arch/s390/kernel/hiperdispatch.c @@ -3,8 +3,7 @@ * Copyright IBM Corp. 2024 */ -#define KMSG_COMPONENT "hd" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "hd: " fmt /* * Hiperdispatch: @@ -45,6 +44,7 @@ * therefore delaying the throughput loss caused by using SMP threads. */ +#include <linux/cpufeature.h> #include <linux/cpumask.h> #include <linux/debugfs.h> #include <linux/device.h> @@ -64,7 +64,7 @@ #define HD_DELAY_FACTOR (4) #define HD_DELAY_INTERVAL (HZ / 4) -#define HD_STEAL_THRESHOLD 30 +#define HD_STEAL_THRESHOLD 10 #define HD_STEAL_AVG_WEIGHT 16 static cpumask_t hd_vl_coremask; /* Mask containing all vertical low COREs */ @@ -87,7 +87,7 @@ static DECLARE_DELAYED_WORK(hd_capacity_work, hd_capacity_work_fn); static int hd_set_hiperdispatch_mode(int enable) { - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) enable = 0; if (hd_enabled == enable) return 0; @@ -190,7 +190,7 @@ int hd_enable_hiperdispatch(void) return 0; if (hd_online_cores <= hd_entitled_cores) return 0; - mod_delayed_work(system_wq, &hd_capacity_work, HD_DELAY_INTERVAL * hd_delay_factor); + mod_delayed_work(system_dfl_wq, &hd_capacity_work, HD_DELAY_INTERVAL * hd_delay_factor); hd_update_capacities(); return 1; } @@ -292,7 +292,7 @@ static int hiperdispatch_ctl_handler(const struct ctl_table *ctl, int write, return 0; } -static struct ctl_table hiperdispatch_ctl_table[] = { +static const struct ctl_table hiperdispatch_ctl_table[] = { { .procname = "hiperdispatch", .mode = 0644, diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index edbb52ce3f1e..961a3d60a4dd 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -22,6 +22,7 @@ #include <linux/debug_locks.h> #include <linux/vmalloc.h> #include <asm/asm-extable.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/ipl.h> #include <asm/smp.h> @@ -185,7 +186,7 @@ static inline int __diag308(unsigned long subcode, unsigned long addr) r1.even = addr; r1.odd = 0; - asm volatile( + asm_inline volatile( " diag %[r1],%[subcode],0x308\n" "0: nopr %%r7\n" EX_TABLE(0b,0b) @@ -269,8 +270,8 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ { \ if (len >= sizeof(_value)) \ return -E2BIG; \ - len = strscpy(_value, buf, sizeof(_value)); \ - if (len < 0) \ + len = strscpy(_value, buf); \ + if ((ssize_t)len < 0) \ return len; \ strim(_value); \ return len; \ @@ -280,58 +281,58 @@ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ sys_##_prefix##_##_name##_show, \ sys_##_prefix##_##_name##_store) -#define IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ -static ssize_t sys_##_prefix##_scp_data_show(struct file *filp, \ - struct kobject *kobj, \ - struct bin_attribute *attr, \ - char *buf, loff_t off, \ - size_t count) \ -{ \ - size_t size = _ipl_block.scp_data_len; \ - void *scp_data = _ipl_block.scp_data; \ - \ - return memory_read_from_buffer(buf, count, &off, \ - scp_data, size); \ +#define IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ +static ssize_t sys_##_prefix##_scp_data_show(struct file *filp, \ + struct kobject *kobj, \ + const struct bin_attribute *attr, \ + char *buf, loff_t off, \ + size_t count) \ +{ \ + size_t size = _ipl_block.scp_data_len; \ + void *scp_data = _ipl_block.scp_data; \ + \ + return memory_read_from_buffer(buf, count, &off, \ + scp_data, size); \ } #define IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\ -static ssize_t sys_##_prefix##_scp_data_store(struct file *filp, \ - struct kobject *kobj, \ - struct bin_attribute *attr, \ - char *buf, loff_t off, \ - size_t count) \ -{ \ - size_t scpdata_len = count; \ - size_t padding; \ - \ - if (off) \ - return -EINVAL; \ - \ - memcpy(_ipl_block.scp_data, buf, count); \ - if (scpdata_len % 8) { \ - padding = 8 - (scpdata_len % 8); \ - memset(_ipl_block.scp_data + scpdata_len, \ - 0, padding); \ - scpdata_len += padding; \ - } \ - \ - _ipl_block_hdr.len = _ipl_bp_len + scpdata_len; \ - _ipl_block.len = _ipl_bp0_len + scpdata_len; \ - _ipl_block.scp_data_len = scpdata_len; \ - \ - return count; \ +static ssize_t sys_##_prefix##_scp_data_store(struct file *filp, \ + struct kobject *kobj, \ + const struct bin_attribute *attr, \ + char *buf, loff_t off, \ + size_t count) \ +{ \ + size_t scpdata_len = count; \ + size_t padding; \ + \ + if (off) \ + return -EINVAL; \ + \ + memcpy(_ipl_block.scp_data, buf, count); \ + if (scpdata_len % 8) { \ + padding = 8 - (scpdata_len % 8); \ + memset(_ipl_block.scp_data + scpdata_len, \ + 0, padding); \ + scpdata_len += padding; \ + } \ + \ + _ipl_block_hdr.len = _ipl_bp_len + scpdata_len; \ + _ipl_block.len = _ipl_bp0_len + scpdata_len; \ + _ipl_block.scp_data_len = scpdata_len; \ + \ + return count; \ } #define DEFINE_IPL_ATTR_SCP_DATA_RO(_prefix, _ipl_block, _size) \ IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ -static struct bin_attribute sys_##_prefix##_scp_data_attr = \ +static const struct bin_attribute sys_##_prefix##_scp_data_attr = \ __BIN_ATTR(scp_data, 0444, sys_##_prefix##_scp_data_show, \ NULL, _size) #define DEFINE_IPL_ATTR_SCP_DATA_RW(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len, _size)\ IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\ -static struct bin_attribute sys_##_prefix##_scp_data_attr = \ +static const struct bin_attribute sys_##_prefix##_scp_data_attr = \ __BIN_ATTR(scp_data, 0644, sys_##_prefix##_scp_data_show, \ sys_##_prefix##_scp_data_store, _size) @@ -434,19 +435,19 @@ static struct kobj_attribute sys_ipl_device_attr = __ATTR(device, 0444, sys_ipl_device_show, NULL); static ssize_t sys_ipl_parameter_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { return memory_read_from_buffer(buf, count, &off, &ipl_block, ipl_block.hdr.len); } -static struct bin_attribute sys_ipl_parameter_attr = +static const struct bin_attribute sys_ipl_parameter_attr = __BIN_ATTR(binary_parameter, 0444, sys_ipl_parameter_read, NULL, PAGE_SIZE); DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_fcp, ipl_block.fcp, PAGE_SIZE); -static struct bin_attribute *ipl_fcp_bin_attrs[] = { +static const struct bin_attribute *const ipl_fcp_bin_attrs[] = { &sys_ipl_parameter_attr, &sys_ipl_fcp_scp_data_attr, NULL, @@ -454,7 +455,7 @@ static struct bin_attribute *ipl_fcp_bin_attrs[] = { DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_nvme, ipl_block.nvme, PAGE_SIZE); -static struct bin_attribute *ipl_nvme_bin_attrs[] = { +static const struct bin_attribute *const ipl_nvme_bin_attrs[] = { &sys_ipl_parameter_attr, &sys_ipl_nvme_scp_data_attr, NULL, @@ -462,7 +463,7 @@ static struct bin_attribute *ipl_nvme_bin_attrs[] = { DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_eckd, ipl_block.eckd, PAGE_SIZE); -static struct bin_attribute *ipl_eckd_bin_attrs[] = { +static const struct bin_attribute *const ipl_eckd_bin_attrs[] = { &sys_ipl_parameter_attr, &sys_ipl_eckd_scp_data_attr, NULL, @@ -593,7 +594,7 @@ static struct attribute *ipl_fcp_attrs[] = { NULL, }; -static struct attribute_group ipl_fcp_attr_group = { +static const struct attribute_group ipl_fcp_attr_group = { .attrs = ipl_fcp_attrs, .bin_attrs = ipl_fcp_bin_attrs, }; @@ -607,7 +608,7 @@ static struct attribute *ipl_nvme_attrs[] = { NULL, }; -static struct attribute_group ipl_nvme_attr_group = { +static const struct attribute_group ipl_nvme_attr_group = { .attrs = ipl_nvme_attrs, .bin_attrs = ipl_nvme_bin_attrs, }; @@ -620,7 +621,7 @@ static struct attribute *ipl_eckd_attrs[] = { NULL, }; -static struct attribute_group ipl_eckd_attr_group = { +static const struct attribute_group ipl_eckd_attr_group = { .attrs = ipl_eckd_attrs, .bin_attrs = ipl_eckd_bin_attrs, }; @@ -640,11 +641,11 @@ static struct attribute *ipl_ccw_attrs_lpar[] = { NULL, }; -static struct attribute_group ipl_ccw_attr_group_vm = { +static const struct attribute_group ipl_ccw_attr_group_vm = { .attrs = ipl_ccw_attrs_vm, }; -static struct attribute_group ipl_ccw_attr_group_lpar = { +static const struct attribute_group ipl_ccw_attr_group_lpar = { .attrs = ipl_ccw_attrs_lpar }; @@ -655,7 +656,7 @@ static struct attribute *ipl_common_attrs[] = { NULL, }; -static struct attribute_group ipl_common_attr_group = { +static const struct attribute_group ipl_common_attr_group = { .attrs = ipl_common_attrs, }; @@ -685,7 +686,7 @@ static int __init ipl_init(void) goto out; switch (ipl_info.type) { case IPL_TYPE_CCW: - if (MACHINE_IS_VM) + if (machine_is_vm()) rc = sysfs_create_group(&ipl_kset->kobj, &ipl_ccw_attr_group_vm); else @@ -808,7 +809,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_fcp, reipl_block_fcp->hdr, IPL_BP_FCP_LEN, IPL_BP0_FCP_LEN, DIAG308_SCPDATA_SIZE); -static struct bin_attribute *reipl_fcp_bin_attrs[] = { +static const struct bin_attribute *const reipl_fcp_bin_attrs[] = { &sys_reipl_fcp_scp_data_attr, NULL, }; @@ -917,7 +918,7 @@ static struct attribute *reipl_fcp_attrs[] = { NULL, }; -static struct attribute_group reipl_fcp_attr_group = { +static const struct attribute_group reipl_fcp_attr_group = { .attrs = reipl_fcp_attrs, .bin_attrs = reipl_fcp_bin_attrs, }; @@ -932,7 +933,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_nvme, reipl_block_nvme->hdr, IPL_BP_NVME_LEN, IPL_BP0_NVME_LEN, DIAG308_SCPDATA_SIZE); -static struct bin_attribute *reipl_nvme_bin_attrs[] = { +static const struct bin_attribute *const reipl_nvme_bin_attrs[] = { &sys_reipl_nvme_scp_data_attr, NULL, }; @@ -955,7 +956,7 @@ static struct attribute *reipl_nvme_attrs[] = { NULL, }; -static struct attribute_group reipl_nvme_attr_group = { +static const struct attribute_group reipl_nvme_attr_group = { .attrs = reipl_nvme_attrs, .bin_attrs = reipl_nvme_bin_attrs }; @@ -1031,7 +1032,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_eckd, reipl_block_eckd->hdr, IPL_BP_ECKD_LEN, IPL_BP0_ECKD_LEN, DIAG308_SCPDATA_SIZE); -static struct bin_attribute *reipl_eckd_bin_attrs[] = { +static const struct bin_attribute *const reipl_eckd_bin_attrs[] = { &sys_reipl_eckd_scp_data_attr, NULL, }; @@ -1048,7 +1049,7 @@ static struct attribute *reipl_eckd_attrs[] = { NULL, }; -static struct attribute_group reipl_eckd_attr_group = { +static const struct attribute_group reipl_eckd_attr_group = { .attrs = reipl_eckd_attrs, .bin_attrs = reipl_eckd_bin_attrs }; @@ -1272,7 +1273,7 @@ static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb) ipb->ccw.flags = IPL_PB0_FLAG_LOADPARM; /* VM PARM */ - if (MACHINE_IS_VM && ipl_block_valid && + if (machine_is_vm() && ipl_block_valid && (ipl_block.ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP)) { ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP; @@ -1286,7 +1287,7 @@ static int __init reipl_nss_init(void) { int rc; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 0; reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL); @@ -1311,8 +1312,8 @@ static int __init reipl_ccw_init(void) return -ENOMEM; rc = sysfs_create_group(&reipl_kset->kobj, - MACHINE_IS_VM ? &reipl_ccw_attr_group_vm - : &reipl_ccw_attr_group_lpar); + machine_is_vm() ? &reipl_ccw_attr_group_vm + : &reipl_ccw_attr_group_lpar); if (rc) return rc; @@ -1587,12 +1588,12 @@ static struct attribute *dump_fcp_attrs[] = { NULL, }; -static struct bin_attribute *dump_fcp_bin_attrs[] = { +static const struct bin_attribute *const dump_fcp_bin_attrs[] = { &sys_dump_fcp_scp_data_attr, NULL, }; -static struct attribute_group dump_fcp_attr_group = { +static const struct attribute_group dump_fcp_attr_group = { .name = IPL_FCP_STR, .attrs = dump_fcp_attrs, .bin_attrs = dump_fcp_bin_attrs, @@ -1621,12 +1622,12 @@ static struct attribute *dump_nvme_attrs[] = { NULL, }; -static struct bin_attribute *dump_nvme_bin_attrs[] = { +static const struct bin_attribute *const dump_nvme_bin_attrs[] = { &sys_dump_nvme_scp_data_attr, NULL, }; -static struct attribute_group dump_nvme_attr_group = { +static const struct attribute_group dump_nvme_attr_group = { .name = IPL_NVME_STR, .attrs = dump_nvme_attrs, .bin_attrs = dump_nvme_bin_attrs, @@ -1655,12 +1656,12 @@ static struct attribute *dump_eckd_attrs[] = { NULL, }; -static struct bin_attribute *dump_eckd_bin_attrs[] = { +static const struct bin_attribute *const dump_eckd_bin_attrs[] = { &sys_dump_eckd_scp_data_attr, NULL, }; -static struct attribute_group dump_eckd_attr_group = { +static const struct attribute_group dump_eckd_attr_group = { .name = IPL_ECKD_STR, .attrs = dump_eckd_attrs, .bin_attrs = dump_eckd_bin_attrs, @@ -1987,7 +1988,7 @@ static void vmcmd_run(struct shutdown_trigger *trigger) static int vmcmd_init(void) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return -EOPNOTSUPP; vmcmd_kset = kset_create_and_add("vmcmd", NULL, firmware_kobj); if (!vmcmd_kset) @@ -2248,26 +2249,28 @@ static int __init s390_ipl_init(void) __initcall(s390_ipl_init); -static void __init strncpy_skip_quote(char *dst, char *src, int n) +static void __init strscpy_skip_quote(char *dst, char *src, int n) { int sx, dx; - dx = 0; - for (sx = 0; src[sx] != 0; sx++) { + if (!n) + return; + for (sx = 0, dx = 0; src[sx]; sx++) { if (src[sx] == '"') continue; - dst[dx++] = src[sx]; - if (dx >= n) + dst[dx] = src[sx]; + if (dx + 1 == n) break; + dx++; } + dst[dx] = '\0'; } static int __init vmcmd_on_reboot_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_reboot, str, VMCMD_MAX_SIZE); - vmcmd_on_reboot[VMCMD_MAX_SIZE] = 0; + strscpy_skip_quote(vmcmd_on_reboot, str, sizeof(vmcmd_on_reboot)); on_reboot_trigger.action = &vmcmd_action; return 1; } @@ -2275,10 +2278,9 @@ __setup("vmreboot=", vmcmd_on_reboot_setup); static int __init vmcmd_on_panic_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_panic, str, VMCMD_MAX_SIZE); - vmcmd_on_panic[VMCMD_MAX_SIZE] = 0; + strscpy_skip_quote(vmcmd_on_panic, str, sizeof(vmcmd_on_panic)); on_panic_trigger.action = &vmcmd_action; return 1; } @@ -2286,10 +2288,9 @@ __setup("vmpanic=", vmcmd_on_panic_setup); static int __init vmcmd_on_halt_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_halt, str, VMCMD_MAX_SIZE); - vmcmd_on_halt[VMCMD_MAX_SIZE] = 0; + strscpy_skip_quote(vmcmd_on_halt, str, sizeof(vmcmd_on_halt)); on_halt_trigger.action = &vmcmd_action; return 1; } @@ -2297,10 +2298,9 @@ __setup("vmhalt=", vmcmd_on_halt_setup); static int __init vmcmd_on_poff_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_poff, str, VMCMD_MAX_SIZE); - vmcmd_on_poff[VMCMD_MAX_SIZE] = 0; + strscpy_skip_quote(vmcmd_on_poff, str, sizeof(vmcmd_on_poff)); on_poff_trigger.action = &vmcmd_action; return 1; } diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index ef7be599e1f7..bdf9c7cb5685 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -9,6 +9,7 @@ */ #include <linux/kernel_stat.h> +#include <linux/cpufeature.h> #include <linux/interrupt.h> #include <linux/seq_file.h> #include <linux/proc_fs.h> @@ -25,6 +26,7 @@ #include <asm/irq_regs.h> #include <asm/cputime.h> #include <asm/lowcore.h> +#include <asm/machine.h> #include <asm/irq.h> #include <asm/hw_irq.h> #include <asm/stacktrace.h> @@ -84,7 +86,6 @@ static const struct irq_class irqclass_sub_desc[] = { {.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"}, {.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"}, {.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"}, - {.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"}, {.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"}, {.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"}, {.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"}, @@ -149,7 +150,7 @@ void noinstr do_io_irq(struct pt_regs *regs) if (user_mode(regs)) { update_timer_sys(); - if (static_branch_likely(&cpu_has_bear)) + if (cpu_has_bear()) current->thread.last_break = regs->last_break; } @@ -164,7 +165,7 @@ void noinstr do_io_irq(struct pt_regs *regs) do_irq_async(regs, THIN_INTERRUPT); else do_irq_async(regs, IO_INTERRUPT); - } while (MACHINE_IS_LPAR && irq_pending(regs)); + } while (machine_is_lpar() && irq_pending(regs)); irq_exit_rcu(); @@ -185,7 +186,7 @@ void noinstr do_ext_irq(struct pt_regs *regs) if (user_mode(regs)) { update_timer_sys(); - if (static_branch_likely(&cpu_has_bear)) + if (cpu_has_bear()) current->thread.last_break = regs->last_break; } diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c index 4d364de43799..143e34a4eca5 100644 --- a/arch/s390/kernel/kexec_elf.c +++ b/arch/s390/kernel/kexec_elf.c @@ -16,7 +16,7 @@ static int kexec_file_add_kernel_elf(struct kimage *image, struct s390_load_data *data) { - struct kexec_buf buf; + struct kexec_buf buf = {}; const Elf_Ehdr *ehdr; const Elf_Phdr *phdr; Elf_Addr entry; diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c index a32ce8bea745..9a439175723c 100644 --- a/arch/s390/kernel/kexec_image.c +++ b/arch/s390/kernel/kexec_image.c @@ -16,7 +16,7 @@ static int kexec_file_add_kernel_image(struct kimage *image, struct s390_load_data *data) { - struct kexec_buf buf; + struct kexec_buf buf = {}; buf.image = image; diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 6295faf0987d..c450120b4474 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -13,6 +13,7 @@ #include <linux/ptrace.h> #include <linux/preempt.h> #include <linux/stop_machine.h> +#include <linux/cpufeature.h> #include <linux/kdebug.h> #include <linux/uaccess.h> #include <linux/extable.h> @@ -153,7 +154,7 @@ void arch_arm_kprobe(struct kprobe *p) { struct swap_insn_args args = {.p = p, .arm_kprobe = 1}; - if (MACHINE_HAS_SEQ_INSN) { + if (cpu_has_seq_insn()) { swap_instruction(&args); text_poke_sync(); } else { @@ -166,7 +167,7 @@ void arch_disarm_kprobe(struct kprobe *p) { struct swap_insn_args args = {.p = p, .arm_kprobe = 0}; - if (MACHINE_HAS_SEQ_INSN) { + if (cpu_has_seq_insn()) { swap_instruction(&args); text_poke_sync(); } else { @@ -489,6 +490,12 @@ int __init arch_init_kprobes(void) return 0; } +int __init arch_populate_kprobe_blacklist(void) +{ + return kprobe_add_area_blacklist((unsigned long)__irqentry_text_start, + (unsigned long)__irqentry_text_end); +} + int arch_trampoline_kprobe(struct kprobe *p) { return 0; diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c index 6652e54cf3db..6d1ffca5f798 100644 --- a/arch/s390/kernel/lgr.c +++ b/arch/s390/kernel/lgr.c @@ -166,7 +166,7 @@ static struct timer_list lgr_timer; */ static void lgr_timer_set(void) { - mod_timer(&lgr_timer, jiffies + msecs_to_jiffies(LGR_TIMER_INTERVAL_SECS * MSEC_PER_SEC)); + mod_timer(&lgr_timer, jiffies + secs_to_jiffies(LGR_TIMER_INTERVAL_SECS)); } /* diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 8f681ccfb83a..baeb3dcfc1c8 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -13,7 +13,9 @@ #include <linux/reboot.h> #include <linux/ftrace.h> #include <linux/debug_locks.h> +#include <linux/cpufeature.h> #include <asm/guarded_storage.h> +#include <asm/machine.h> #include <asm/pfault.h> #include <asm/cio.h> #include <asm/fpu.h> @@ -94,7 +96,7 @@ static noinline void __machine_kdump(void *image) mcesa = __va(get_lowcore()->mcesad & MCESA_ORIGIN_MASK); if (cpu_has_vx()) save_vx_regs((__vector128 *) mcesa->vector_save_area); - if (MACHINE_HAS_GS) { + if (cpu_has_gs()) { local_ctl_store(2, &cr2_old.reg); cr2_new = cr2_old; cr2_new.gse = 1; @@ -178,7 +180,7 @@ void arch_kexec_unprotect_crashkres(void) static int machine_kexec_prepare_kdump(void) { #ifdef CONFIG_CRASH_DUMP - if (MACHINE_IS_VM) + if (machine_is_vm()) diag10_range(PFN_DOWN(crashk_res.start), PFN_DOWN(crashk_res.end - crashk_res.start + 1)); return 0; diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index c2bac14dd668..a36d7311c668 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -129,7 +129,7 @@ static int kexec_file_update_purgatory(struct kimage *image, static int kexec_file_add_purgatory(struct kimage *image, struct s390_load_data *data) { - struct kexec_buf buf; + struct kexec_buf buf = {}; int ret; buf.image = image; @@ -152,7 +152,7 @@ static int kexec_file_add_purgatory(struct kimage *image, static int kexec_file_add_initrd(struct kimage *image, struct s390_load_data *data) { - struct kexec_buf buf; + struct kexec_buf buf = {}; int ret; buf.image = image; @@ -184,7 +184,7 @@ static int kexec_file_add_ipl_report(struct kimage *image, { __u32 *lc_ipl_parmblock_ptr; unsigned int len, ncerts; - struct kexec_buf buf; + struct kexec_buf buf = {}; unsigned long addr; void *ptr, *end; int ret; diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 7e267ef63a7f..1fec370fecf4 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -104,17 +104,6 @@ SYM_CODE_START(ftrace_common) lgr %r3,%r14 la %r5,STACK_FREGS(%r15) BASR_EX %r14,%r1 -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -# The j instruction gets runtime patched to a nop instruction. -# See ftrace_enable_ftrace_graph_caller. -SYM_INNER_LABEL(ftrace_graph_caller, SYM_L_GLOBAL) - j .Lftrace_graph_caller_end - lmg %r2,%r3,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15) - lg %r4,(STACK_FREGS_PTREGS_PSW+8)(%r15) - brasl %r14,prepare_ftrace_return - stg %r2,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15) -.Lftrace_graph_caller_end: -#endif lg %r0,(STACK_FREGS_PTREGS_PSW+8)(%r15) #ifdef MARCH_HAS_Z196_FEATURES ltg %r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15) @@ -134,14 +123,14 @@ SYM_CODE_END(ftrace_common) SYM_FUNC_START(return_to_handler) stmg %r2,%r5,32(%r15) lgr %r1,%r15 - aghi %r15,-(STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE) + # allocate ftrace_regs and stack frame for ftrace_return_to_handler + aghi %r15,-STACK_FRAME_SIZE_FREGS stg %r1,__SF_BACKCHAIN(%r15) - la %r3,STACK_FRAME_OVERHEAD(%r15) - stg %r1,__FGRAPH_RET_FP(%r3) - stg %r2,__FGRAPH_RET_GPR2(%r3) - lgr %r2,%r3 + stg %r2,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15) + stg %r1,(STACK_FREGS_PTREGS_GPRS+15*8)(%r15) + la %r2,STACK_FRAME_OVERHEAD(%r15) brasl %r14,ftrace_return_to_handler - aghi %r15,STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE + aghi %r15,STACK_FRAME_SIZE_FREGS lgr %r14,%r2 lmg %r2,%r5,32(%r15) BR_EX %r14 diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 91e207b50394..9d1f8a50f5a4 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -22,12 +22,14 @@ #include <linux/bug.h> #include <linux/memory.h> #include <linux/execmem.h> +#include <asm/arch-stackprotector.h> #include <asm/alternative.h> #include <asm/nospec-branch.h> #include <asm/facility.h> #include <asm/ftrace.lds.h> #include <asm/set_memory.h> #include <asm/setup.h> +#include <asm/asm-offsets.h> #if 0 #define DEBUGP printk @@ -495,9 +497,7 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *s; char *secstrings, *secname; void *aseg; -#ifdef CONFIG_FUNCTION_TRACER - int ret; -#endif + int rc = 0; if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable && me->arch.plt_size) { @@ -527,14 +527,21 @@ int module_finalize(const Elf_Ehdr *hdr, (str_has_prefix(secname, ".s390_return"))) nospec_revert(aseg, aseg + s->sh_size); + if (IS_ENABLED(CONFIG_STACKPROTECTOR) && + (str_has_prefix(secname, "__stack_protector_loc"))) { + rc = stack_protector_apply(aseg, aseg + s->sh_size); + if (rc) + break; + } + #ifdef CONFIG_FUNCTION_TRACER if (!strcmp(FTRACE_CALLSITE_SECTION, secname)) { - ret = module_alloc_ftrace_hotpatch_trampolines(me, s); - if (ret < 0) - return ret; + rc = module_alloc_ftrace_hotpatch_trampolines(me, s); + if (rc) + break; } #endif /* CONFIG_FUNCTION_TRACER */ } - return 0; + return rc; } diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index fbd218b6fc8e..a55abbf65333 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -9,6 +9,8 @@ */ #include <linux/kernel_stat.h> +#include <linux/utsname.h> +#include <linux/cpufeature.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/entry-common.h> @@ -20,7 +22,6 @@ #include <linux/module.h> #include <linux/sched/signal.h> #include <linux/kvm_host.h> -#include <linux/export.h> #include <asm/lowcore.h> #include <asm/ctlreg.h> #include <asm/fpu.h> @@ -45,7 +46,7 @@ static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck); static inline int nmi_needs_mcesa(void) { - return cpu_has_vx() || MACHINE_HAS_GS; + return cpu_has_vx() || cpu_has_gs(); } /* @@ -61,7 +62,7 @@ void __init nmi_alloc_mcesa_early(u64 *mcesad) if (!nmi_needs_mcesa()) return; *mcesad = __pa(&boot_mcesa); - if (MACHINE_HAS_GS) + if (cpu_has_gs()) *mcesad |= ilog2(MCESA_MAX_SIZE); } @@ -73,14 +74,14 @@ int nmi_alloc_mcesa(u64 *mcesad) *mcesad = 0; if (!nmi_needs_mcesa()) return 0; - size = MACHINE_HAS_GS ? MCESA_MAX_SIZE : MCESA_MIN_SIZE; + size = cpu_has_gs() ? MCESA_MAX_SIZE : MCESA_MIN_SIZE; origin = kmalloc(size, GFP_KERNEL); if (!origin) return -ENOMEM; /* The pointer is stored with mcesa_bits ORed in */ kmemleak_not_leak(origin); *mcesad = __pa(origin); - if (MACHINE_HAS_GS) + if (cpu_has_gs()) *mcesad |= ilog2(MCESA_MAX_SIZE); return 0; } @@ -115,18 +116,82 @@ static __always_inline char *u64_to_hex(char *dest, u64 val) return dest; } -static notrace void s390_handle_damage(void) +static notrace void nmi_print_info(void) { struct lowcore *lc = get_lowcore(); - union ctlreg0 cr0, cr0_new; char message[100]; - psw_t psw_save; char *ptr; + int i; + + ptr = nmi_puts(message, "Unrecoverable machine check, code: "); + ptr = u64_to_hex(ptr, lc->mcck_interruption_code); + ptr = nmi_puts(ptr, "\n"); + sclp_emergency_printk(message); + + ptr = nmi_puts(message, init_utsname()->release); + ptr = nmi_puts(ptr, "\n"); + sclp_emergency_printk(message); + + ptr = nmi_puts(message, arch_hw_string); + ptr = nmi_puts(ptr, "\n"); + sclp_emergency_printk(message); + + ptr = nmi_puts(message, "PSW: "); + ptr = u64_to_hex(ptr, lc->mcck_old_psw.mask); + ptr = nmi_puts(ptr, " "); + ptr = u64_to_hex(ptr, lc->mcck_old_psw.addr); + ptr = nmi_puts(ptr, " PFX: "); + ptr = u64_to_hex(ptr, (u64)get_lowcore()); + ptr = nmi_puts(ptr, "\n"); + sclp_emergency_printk(message); + + ptr = nmi_puts(message, "LBA: "); + ptr = u64_to_hex(ptr, lc->last_break_save_area); + ptr = nmi_puts(ptr, " EDC: "); + ptr = u64_to_hex(ptr, lc->external_damage_code); + ptr = nmi_puts(ptr, " FSA: "); + ptr = u64_to_hex(ptr, lc->failing_storage_address); + ptr = nmi_puts(ptr, "\n"); + sclp_emergency_printk(message); + + ptr = nmi_puts(message, "CRS:\n"); + sclp_emergency_printk(message); + ptr = message; + for (i = 0; i < 16; i++) { + ptr = u64_to_hex(ptr, lc->cregs_save_area[i].val); + ptr = nmi_puts(ptr, " "); + if ((i + 1) % 4 == 0) { + ptr = nmi_puts(ptr, "\n"); + sclp_emergency_printk(message); + ptr = message; + } + } + + ptr = nmi_puts(message, "GPRS:\n"); + sclp_emergency_printk(message); + ptr = message; + for (i = 0; i < 16; i++) { + ptr = u64_to_hex(ptr, lc->gpregs_save_area[i]); + ptr = nmi_puts(ptr, " "); + if ((i + 1) % 4 == 0) { + ptr = nmi_puts(ptr, "\n"); + sclp_emergency_printk(message); + ptr = message; + } + } + + ptr = nmi_puts(message, "System stopped\n"); + sclp_emergency_printk(message); +} + +static notrace void __noreturn s390_handle_damage(void) +{ + struct lowcore *lc = get_lowcore(); + union ctlreg0 cr0, cr0_new; + psw_t psw_save; smp_emergency_stop(); diag_amode31_ops.diag308_reset(); - ptr = nmi_puts(message, "System stopped due to unrecoverable machine check, code: 0x"); - u64_to_hex(ptr, lc->mcck_interruption_code); /* * Disable low address protection and make machine check new PSW a @@ -140,7 +205,7 @@ static notrace void s390_handle_damage(void) psw_bits(lc->mcck_new_psw).io = 0; psw_bits(lc->mcck_new_psw).ext = 0; psw_bits(lc->mcck_new_psw).wait = 1; - sclp_emergency_printk(message); + nmi_print_info(); /* * Restore machine check new PSW and control register 0 to original @@ -149,7 +214,6 @@ static notrace void s390_handle_damage(void) lc->mcck_new_psw = psw_save; local_ctl_load(0, &cr0.reg); disabled_wait(); - while (1); } NOKPROBE_SYMBOL(s390_handle_damage); diff --git a/arch/s390/kernel/numa.c b/arch/s390/kernel/numa.c index ddc1448ea2e1..2fc40f97c0ad 100644 --- a/arch/s390/kernel/numa.c +++ b/arch/s390/kernel/numa.c @@ -21,12 +21,8 @@ void __init numa_setup(void) nodes_clear(node_possible_map); node_set(0, node_possible_map); node_set_online(0); - for (nid = 0; nid < MAX_NUMNODES; nid++) { - NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8); - if (!NODE_DATA(nid)) - panic("%s: Failed to allocate %zu bytes align=0x%x\n", - __func__, sizeof(pg_data_t), 8); - } + for (nid = 0; nid < MAX_NUMNODES; nid++) + NODE_DATA(nid) = memblock_alloc_or_panic(sizeof(pg_data_t), 8); NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT; NODE_DATA(0)->node_id = 0; } diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index 29080d6d5d8d..94fa44776d0c 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -6,8 +6,7 @@ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> */ -#define KMSG_COMPONENT "os_info" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "os_info: " fmt #include <linux/crash_dump.h> #include <linux/kernel.h> @@ -18,6 +17,7 @@ #include <asm/physmem_info.h> #include <asm/maccess.h> #include <asm/asm-offsets.h> +#include <asm/sections.h> #include <asm/ipl.h> /* diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index b0bc68da6a11..408ab93112bf 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -6,15 +6,13 @@ * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com> * Thomas Richter <tmricht@linux.ibm.com> */ -#define KMSG_COMPONENT "cpum_cf" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "cpum_cf: " fmt #include <linux/kernel.h> #include <linux/kernel_stat.h> #include <linux/percpu.h> #include <linux/notifier.h> #include <linux/init.h> -#include <linux/export.h> #include <linux/miscdevice.h> #include <linux/perf_event.h> @@ -442,7 +440,7 @@ static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset) ctrset_size = 48; else if (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5) ctrset_size = 128; - else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7) + else if (cpumf_ctr_info.csvn >= 6 && cpumf_ctr_info.csvn <= 8) ctrset_size = 160; break; case CPUMF_CTR_SET_MT_DIAG: @@ -761,8 +759,6 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) break; case PERF_TYPE_HARDWARE: - if (is_sampling_event(event)) /* No sampling support */ - return -ENOENT; ev = attr->config; if (!attr->exclude_user && attr->exclude_kernel) { /* @@ -858,18 +854,15 @@ static int cpumf_pmu_event_type(struct perf_event *event) static int cpumf_pmu_event_init(struct perf_event *event) { unsigned int type = event->attr.type; - int err; + int err = -ENOENT; + if (is_sampling_event(event)) /* No sampling support */ + return err; if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) err = __hw_perf_event_init(event, type); else if (event->pmu->type == type) /* Registered as unknown PMU */ err = __hw_perf_event_init(event, cpumf_pmu_event_type(event)); - else - return -ENOENT; - - if (unlikely(err) && event->destroy) - event->destroy(event); return err; } @@ -981,12 +974,10 @@ static int cfdiag_push_sample(struct perf_event *event, if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw.frag.size = cpuhw->usedss; raw.frag.data = cpuhw->stop; - perf_sample_save_raw_data(&data, &raw); + perf_sample_save_raw_data(&data, event, &raw); } overflow = perf_event_overflow(event, &data, ®s); - if (overflow) - event->pmu->stop(event, 0); perf_event_update_userpage(event); return overflow; @@ -1214,7 +1205,7 @@ static int __init cpumf_pmu_init(void) } /* Setup s390dbf facility */ - cf_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128); + cf_dbg = debug_register("cpum_cf", 2, 1, 128); if (!cf_dbg) { pr_err("Registration of s390dbf(cpum_cf) failed\n"); rc = -ENOMEM; @@ -1697,7 +1688,6 @@ static const struct file_operations cfset_fops = { .open = cfset_open, .release = cfset_release, .unlocked_ioctl = cfset_ioctl, - .compat_ioctl = cfset_ioctl, }; static struct miscdevice cfset_dev = { @@ -1819,8 +1809,6 @@ static int cfdiag_event_init(struct perf_event *event) event->destroy = hw_perf_event_destroy; err = cfdiag_event_init2(event); - if (unlikely(err)) - event->destroy(event); out: return err; } diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index e4a6bfc91080..7ace1f9e4ccf 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -237,7 +237,6 @@ CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_NO_SPECIAL, 0x00f4); CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5); CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); - CPUMF_EVENT_ATTR(cf_z15, L1D_RO_EXCL_WRITES, 0x0080); CPUMF_EVENT_ATTR(cf_z15, DTLB2_WRITES, 0x0081); CPUMF_EVENT_ATTR(cf_z15, DTLB2_MISSES, 0x0082); @@ -291,8 +290,8 @@ CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_NO_SPECIAL, 0x00f4); CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5); CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7); CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc); -CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108); -CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x00109); +CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x0108); +CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x0109); CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); CPUMF_EVENT_ATTR(cf_z16, L1D_RO_EXCL_WRITES, 0x0080); @@ -365,6 +364,83 @@ CPUMF_EVENT_ATTR(cf_z16, NNPA_WAIT_LOCK, 0x010d); CPUMF_EVENT_ATTR(cf_z16, NNPA_HOLD_LOCK, 0x010e); CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); +CPUMF_EVENT_ATTR(cf_z17, L1D_RO_EXCL_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z17, CRSTE_1MB_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_GPAGE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z17, ITLB2_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z17, ITLB2_MISSES, 0x0087); +CPUMF_EVENT_ATTR(cf_z17, TLB2_PTE_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z17, TLB2_CRSTE_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z17, TLB2_ENGINES_BUSY, 0x008b); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TEND, 0x008c); +CPUMF_EVENT_ATTR(cf_z17, TX_NC_TEND, 0x008d); +CPUMF_EVENT_ATTR(cf_z17, L1C_TLB2_MISSES, 0x008f); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ, 0x0091); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_IV, 0x0092); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_CHIP_HIT, 0x0093); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_DRAWER_HIT, 0x0094); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP, 0x0095); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_IV, 0x0096); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_CHIP_HIT, 0x0097); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT, 0x0098); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE, 0x0099); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER, 0x009a); +CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER, 0x009b); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_MEMORY, 0x009c); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE_MEMORY, 0x009d); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER_MEMORY, 0x009e); +CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER_MEMORY, 0x009f); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_IV, 0x00a0); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT, 0x00a1); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT, 0x00a2); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_IV, 0x00a3); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT, 0x00a4); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT, 0x00a5); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_IV, 0x00a6); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT, 0x00a7); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT, 0x00a8); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ, 0x00a9); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_IV, 0x00aa); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_CHIP_HIT, 0x00ab); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_DRAWER_HIT, 0x00ac); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP, 0x00ad); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_IV, 0x00ae); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_CHIP_HIT, 0x00af); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT, 0x00b0); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_MODULE, 0x00b1); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_DRAWER, 0x00b2); +CPUMF_EVENT_ATTR(cf_z17, ICW_OFF_DRAWER, 0x00b3); +CPUMF_EVENT_ATTR(cf_z17, CYCLES_SAMETHRD, 0x00ca); +CPUMF_EVENT_ATTR(cf_z17, CYCLES_DIFFTHRD, 0x00cb); +CPUMF_EVENT_ATTR(cf_z17, INST_SAMETHRD, 0x00cc); +CPUMF_EVENT_ATTR(cf_z17, INST_DIFFTHRD, 0x00cd); +CPUMF_EVENT_ATTR(cf_z17, WRONG_BRANCH_PREDICTION, 0x00ce); +CPUMF_EVENT_ATTR(cf_z17, VX_BCD_EXECUTION_SLOTS, 0x00e1); +CPUMF_EVENT_ATTR(cf_z17, DECIMAL_INSTRUCTIONS, 0x00e2); +CPUMF_EVENT_ATTR(cf_z17, LAST_HOST_TRANSLATIONS, 0x00e8); +CPUMF_EVENT_ATTR(cf_z17, TX_NC_TABORT, 0x00f4); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_NO_SPECIAL, 0x00f5); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_SPECIAL, 0x00f6); +CPUMF_EVENT_ATTR(cf_z17, DFLT_ACCESS, 0x00f8); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CYCLES, 0x00fd); +CPUMF_EVENT_ATTR(cf_z17, SORTL, 0x0100); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CC, 0x0109); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CCFINISH, 0x010a); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INVOCATIONS, 0x010b); +CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPLETIONS, 0x010c); +CPUMF_EVENT_ATTR(cf_z17, NNPA_WAIT_LOCK, 0x010d); +CPUMF_EVENT_ATTR(cf_z17, NNPA_HOLD_LOCK, 0x010e); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_ONCHIP, 0x0110); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_OFFCHIP, 0x0111); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_DIFF, 0x0112); +CPUMF_EVENT_ATTR(cf_z17, NNPA_4K_PREFETCH, 0x0114); +CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPL_LOCK, 0x0115); +CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK, 0x0116); +CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO, 0x0117); +CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); +CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES), @@ -414,7 +490,7 @@ static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = { NULL, }; -static struct attribute *cpumcf_svn_67_pmu_event_attr[] __initdata = { +static struct attribute *cpumcf_svn_678_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS), CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES), CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS), @@ -779,6 +855,87 @@ static struct attribute *cpumcf_z16_pmu_event_attr[] __initdata = { NULL, }; +static struct attribute *cpumcf_z17_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z17, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, CRSTE_1MB_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_GPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z17, ITLB2_WRITES), + CPUMF_EVENT_PTR(cf_z17, ITLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z17, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z17, TLB2_ENGINES_BUSY), + CPUMF_EVENT_PTR(cf_z17, TX_C_TEND), + CPUMF_EVENT_PTR(cf_z17, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_z17, L1C_TLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_IV), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_IV), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER), + CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_IV), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_IV), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_MODULE), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_DRAWER), + CPUMF_EVENT_PTR(cf_z17, ICW_OFF_DRAWER), + CPUMF_EVENT_PTR(cf_z17, CYCLES_SAMETHRD), + CPUMF_EVENT_PTR(cf_z17, CYCLES_DIFFTHRD), + CPUMF_EVENT_PTR(cf_z17, INST_SAMETHRD), + CPUMF_EVENT_PTR(cf_z17, INST_DIFFTHRD), + CPUMF_EVENT_PTR(cf_z17, WRONG_BRANCH_PREDICTION), + CPUMF_EVENT_PTR(cf_z17, VX_BCD_EXECUTION_SLOTS), + CPUMF_EVENT_PTR(cf_z17, DECIMAL_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_z17, LAST_HOST_TRANSLATIONS), + CPUMF_EVENT_PTR(cf_z17, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_SPECIAL), + CPUMF_EVENT_PTR(cf_z17, DFLT_ACCESS), + CPUMF_EVENT_PTR(cf_z17, DFLT_CYCLES), + CPUMF_EVENT_PTR(cf_z17, SORTL), + CPUMF_EVENT_PTR(cf_z17, DFLT_CC), + CPUMF_EVENT_PTR(cf_z17, DFLT_CCFINISH), + CPUMF_EVENT_PTR(cf_z17, NNPA_INVOCATIONS), + CPUMF_EVENT_PTR(cf_z17, NNPA_COMPLETIONS), + CPUMF_EVENT_PTR(cf_z17, NNPA_WAIT_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_HOLD_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_ONCHIP), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_OFFCHIP), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_DIFF), + CPUMF_EVENT_PTR(cf_z17, NNPA_4K_PREFETCH), + CPUMF_EVENT_PTR(cf_z17, NNPA_COMPL_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO), + CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE), + CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE), + NULL, +}; + /* END: CPUM_CF COUNTER DEFINITIONS ===================================== */ static struct attribute_group cpumcf_pmu_events_group = { @@ -859,7 +1016,7 @@ __init const struct attribute_group **cpumf_cf_event_group(void) if (ci.csvn >= 1 && ci.csvn <= 5) csvn = cpumcf_svn_12345_pmu_event_attr; else if (ci.csvn >= 6) - csvn = cpumcf_svn_67_pmu_event_attr; + csvn = cpumcf_svn_678_pmu_event_attr; /* Determine model-specific counter set(s) */ get_cpu_id(&cpu_id); @@ -892,6 +1049,10 @@ __init const struct attribute_group **cpumf_cf_event_group(void) case 0x3932: model = cpumcf_z16_pmu_event_attr; break; + case 0x9175: + case 0x9176: + model = cpumcf_z17_pmu_event_attr; + break; default: model = none; break; diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 0cde42f8af6e..459af23a47a5 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -5,8 +5,7 @@ * Copyright IBM Corp. 2013, 2018 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> */ -#define KMSG_COMPONENT "cpum_sf" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "cpum_sf: " fmt #include <linux/kernel.h> #include <linux/kernel_stat.h> @@ -14,7 +13,6 @@ #include <linux/percpu.h> #include <linux/pid.h> #include <linux/notifier.h> -#include <linux/export.h> #include <linux/slab.h> #include <linux/mm.h> #include <linux/moduleparam.h> @@ -180,39 +178,27 @@ static int sf_buffer_available(struct cpu_hw_sf *cpuhw) */ static void free_sampling_buffer(struct sf_buffer *sfb) { - unsigned long *sdbt, *curr; - - if (!sfb->sdbt) - return; + unsigned long *sdbt, *curr, *head; sdbt = sfb->sdbt; - curr = sdbt; - + if (!sdbt) + return; + sfb->sdbt = NULL; /* Free the SDBT after all SDBs are processed... */ - while (1) { - if (!*curr || !sdbt) - break; - - /* Process table-link entries */ + head = sdbt; + curr = sdbt; + do { if (is_link_entry(curr)) { + /* Process table-link entries */ curr = get_next_sdbt(curr); - if (sdbt) - free_page((unsigned long)sdbt); - - /* If the origin is reached, sampling buffer is freed */ - if (curr == sfb->sdbt) - break; - else - sdbt = curr; + free_page((unsigned long)sdbt); + sdbt = curr; } else { /* Process SDB pointer */ - if (*curr) { - free_page((unsigned long)phys_to_virt(*curr)); - curr++; - } + free_page((unsigned long)phys_to_virt(*curr)); + curr++; } - } - + } while (curr != head); memset(sfb, 0, sizeof(*sfb)); } @@ -897,9 +883,6 @@ static int cpumsf_pmu_event_init(struct perf_event *event) event->attr.exclude_idle = 0; err = __hw_perf_event_init(event); - if (unlikely(err)) - if (event->destroy) - event->destroy(event); return err; } @@ -993,7 +976,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu) cpuhw->flags &= ~PMU_F_ENABLED; } -/* perf_exclude_event() - Filter event +/* perf_event_exclude() - Filter event * @event: The perf event * @regs: pt_regs structure * @sde_regs: Sample-data-entry (sde) regs structure @@ -1002,7 +985,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu) * * Return non-zero if the event shall be excluded. */ -static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs, +static int perf_event_exclude(struct perf_event *event, struct pt_regs *regs, struct perf_sf_sde_regs *sde_regs) { if (event->attr.exclude_user && user_mode(regs)) @@ -1085,12 +1068,9 @@ static int perf_push_sample(struct perf_event *event, data.tid_entry.pid = basic->hpp & LPP_PID_MASK; overflow = 0; - if (perf_exclude_event(event, ®s, sde_regs)) + if (perf_event_exclude(event, ®s, sde_regs)) goto out; - if (perf_event_overflow(event, &data, ®s)) { - overflow = 1; - event->pmu->stop(event, 0); - } + overflow = perf_event_overflow(event, &data, ®s); perf_event_update_userpage(event); out: return overflow; @@ -1112,7 +1092,7 @@ static void perf_event_count_update(struct perf_event *event, u64 count) * combined-sampling data entry consists of a basic- and a diagnostic-sampling * data entry. The sampling function is determined by the flags in the perf * event hardware structure. The function always works with a combined-sampling - * data entry but ignores the the diagnostic portion if it is not available. + * data entry but ignores the diagnostic portion if it is not available. * * Note that the implementation focuses on basic-sampling data entries and, if * such an entry is not valid, the entire combined-sampling data entry is @@ -2089,7 +2069,7 @@ static int __init init_cpum_sampling_pmu(void) CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG); } - sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); + sfdbg = debug_register("cpum_sf", 2, 1, 80); if (!sfdbg) { pr_err("Registering for s390dbf failed\n"); return -ENOMEM; diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 2b9611c4718e..606750bae508 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -5,18 +5,15 @@ * Copyright IBM Corp. 2012, 2013 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> */ -#define KMSG_COMPONENT "perf" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "perf: " fmt #include <linux/kernel.h> #include <linux/perf_event.h> #include <linux/kvm_host.h> #include <linux/percpu.h> -#include <linux/export.h> #include <linux/seq_file.h> #include <linux/spinlock.h> #include <linux/uaccess.h> -#include <linux/compat.h> #include <linux/sysfs.h> #include <asm/stacktrace.h> #include <asm/irq.h> diff --git a/arch/s390/kernel/perf_pai.c b/arch/s390/kernel/perf_pai.c new file mode 100644 index 000000000000..810f5b6c5e01 --- /dev/null +++ b/arch/s390/kernel/perf_pai.c @@ -0,0 +1,1230 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Performance event support - Processor Activity Instrumentation Facility + * + * Copyright IBM Corp. 2026 + * Author(s): Thomas Richter <tmricht@linux.ibm.com> + */ +#define pr_fmt(fmt) "pai: " fmt + +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/percpu.h> +#include <linux/notifier.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/perf_event.h> +#include <asm/ctlreg.h> +#include <asm/pai.h> +#include <asm/debug.h> + +static debug_info_t *paidbg; + +DEFINE_STATIC_KEY_FALSE(pai_key); + +enum { + PAI_PMU_CRYPTO, /* Index of PMU pai_crypto */ + PAI_PMU_EXT, /* Index of PMU pai_ext */ + PAI_PMU_MAX /* # of PAI PMUs */ +}; + +enum { + PAIE1_CB_SZ = 0x200, /* Size of PAIE1 control block */ + PAIE1_CTRBLOCK_SZ = 0x400 /* Size of PAIE1 counter blocks */ +}; + +struct pai_userdata { + u16 num; + u64 value; +} __packed; + +/* Create the PAI extension 1 control block area. + * The PAI extension control block 1 is pointed to by lowcore + * address 0x1508 for each CPU. This control block is 512 bytes in size + * and requires a 512 byte boundary alignment. + */ +struct paiext_cb { /* PAI extension 1 control block */ + u64 header; /* Not used */ + u64 reserved1; + u64 acc; /* Addr to analytics counter control block */ + u8 reserved2[PAIE1_CTRBLOCK_SZ - 3 * sizeof(u64)]; +} __packed; + +struct pai_map { + unsigned long *area; /* Area for CPU to store counters */ + struct pai_userdata *save; /* Page to store no-zero counters */ + unsigned int active_events; /* # of PAI crypto users */ + refcount_t refcnt; /* Reference count mapped buffers */ + struct perf_event *event; /* Perf event for sampling */ + struct list_head syswide_list; /* List system-wide sampling events */ + struct paiext_cb *paiext_cb; /* PAI extension control block area */ + bool fullpage; /* True: counter area is a full page */ +}; + +struct pai_mapptr { + struct pai_map *mapptr; +}; + +static struct pai_root { /* Anchor to per CPU data */ + refcount_t refcnt; /* Overall active events */ + struct pai_mapptr __percpu *mapptr; +} pai_root[PAI_PMU_MAX]; + +/* This table defines the different parameters of the PAI PMUs. During + * initialization the machine dependent values are extracted and saved. + * However most of the values are static and do not change. + * There is one table entry per PAI PMU. + */ +struct pai_pmu { /* Define PAI PMU characteristics */ + const char *pmuname; /* Name of PMU */ + const int facility_nr; /* Facility number to check for support */ + unsigned int num_avail; /* # Counters defined by hardware */ + unsigned int num_named; /* # Counters known by name */ + unsigned long base; /* Counter set base number */ + unsigned long kernel_offset; /* Offset to kernel part in counter page */ + unsigned long area_size; /* Size of counter area */ + const char * const *names; /* List of counter names */ + struct pmu *pmu; /* Ptr to supporting PMU */ + int (*init)(struct pai_pmu *p); /* PMU support init function */ + void (*exit)(struct pai_pmu *p); /* PMU support exit function */ + struct attribute_group *event_group; /* Ptr to attribute of events */ +}; + +static struct pai_pmu pai_pmu[]; /* Forward declaration */ + +/* Free per CPU data when the last event is removed. */ +static void pai_root_free(int idx) +{ + if (refcount_dec_and_test(&pai_root[idx].refcnt)) { + free_percpu(pai_root[idx].mapptr); + pai_root[idx].mapptr = NULL; + } + debug_sprintf_event(paidbg, 5, "%s root[%d].refcount %d\n", __func__, + idx, refcount_read(&pai_root[idx].refcnt)); +} + +/* + * On initialization of first event also allocate per CPU data dynamically. + * Start with an array of pointers, the array size is the maximum number of + * CPUs possible, which might be larger than the number of CPUs currently + * online. + */ +static int pai_root_alloc(int idx) +{ + if (!refcount_inc_not_zero(&pai_root[idx].refcnt)) { + /* The memory is already zeroed. */ + pai_root[idx].mapptr = alloc_percpu(struct pai_mapptr); + if (!pai_root[idx].mapptr) + return -ENOMEM; + refcount_set(&pai_root[idx].refcnt, 1); + } + return 0; +} + +/* Release the PMU if event is the last perf event */ +static DEFINE_MUTEX(pai_reserve_mutex); + +/* Free all memory allocated for event counting/sampling setup */ +static void pai_free(struct pai_mapptr *mp) +{ + if (mp->mapptr->fullpage) + free_page((unsigned long)mp->mapptr->area); + else + kfree(mp->mapptr->area); + kfree(mp->mapptr->paiext_cb); + kvfree(mp->mapptr->save); + kfree(mp->mapptr); + mp->mapptr = NULL; +} + +/* Adjust usage counters and remove allocated memory when all users are + * gone. + */ +static void pai_event_destroy_cpu(struct perf_event *event, int cpu) +{ + int idx = PAI_PMU_IDX(event); + struct pai_mapptr *mp = per_cpu_ptr(pai_root[idx].mapptr, cpu); + struct pai_map *cpump = mp->mapptr; + + mutex_lock(&pai_reserve_mutex); + debug_sprintf_event(paidbg, 5, "%s event %#llx idx %d cpu %d users %d " + "refcnt %u\n", __func__, event->attr.config, idx, + event->cpu, cpump->active_events, + refcount_read(&cpump->refcnt)); + if (refcount_dec_and_test(&cpump->refcnt)) + pai_free(mp); + pai_root_free(idx); + mutex_unlock(&pai_reserve_mutex); +} + +static void pai_event_destroy(struct perf_event *event) +{ + int cpu; + + free_page(PAI_SAVE_AREA(event)); + if (event->cpu == -1) { + struct cpumask *mask = PAI_CPU_MASK(event); + + for_each_cpu(cpu, mask) + pai_event_destroy_cpu(event, cpu); + kfree(mask); + } else { + pai_event_destroy_cpu(event, event->cpu); + } +} + +static void paicrypt_event_destroy(struct perf_event *event) +{ + static_branch_dec(&pai_key); + pai_event_destroy(event); +} + +static u64 pai_getctr(unsigned long *page, int nr, unsigned long offset) +{ + if (offset) + nr += offset / sizeof(*page); + return page[nr]; +} + +/* Read the counter values. Return value from location in CMP. For base + * event xxx_ALL sum up all events. Returns counter value. + */ +static u64 pai_getdata(struct perf_event *event, bool kernel) +{ + int idx = PAI_PMU_IDX(event); + struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr); + struct pai_pmu *pp = &pai_pmu[idx]; + struct pai_map *cpump = mp->mapptr; + unsigned int i; + u64 sum = 0; + + if (event->attr.config != pp->base) { + return pai_getctr(cpump->area, + event->attr.config - pp->base, + kernel ? pp->kernel_offset : 0); + } + + for (i = 1; i <= pp->num_avail; i++) { + u64 val = pai_getctr(cpump->area, i, + kernel ? pp->kernel_offset : 0); + + if (!val) + continue; + sum += val; + } + return sum; +} + +static u64 paicrypt_getall(struct perf_event *event) +{ + u64 sum = 0; + + if (!event->attr.exclude_kernel) + sum += pai_getdata(event, true); + if (!event->attr.exclude_user) + sum += pai_getdata(event, false); + + return sum; +} + +/* Check concurrent access of counting and sampling for crypto events. + * This function is called in process context and it is save to block. + * When the event initialization functions fails, no other call back will + * be invoked. + * + * Allocate the memory for the event. + */ +static int pai_alloc_cpu(struct perf_event *event, int cpu) +{ + int rc, idx = PAI_PMU_IDX(event); + struct pai_map *cpump = NULL; + bool need_paiext_cb = false; + struct pai_mapptr *mp; + + mutex_lock(&pai_reserve_mutex); + /* Allocate root node */ + rc = pai_root_alloc(idx); + if (rc) + goto unlock; + + /* Allocate node for this event */ + mp = per_cpu_ptr(pai_root[idx].mapptr, cpu); + cpump = mp->mapptr; + if (!cpump) { /* Paicrypt_map allocated? */ + rc = -ENOMEM; + cpump = kzalloc(sizeof(*cpump), GFP_KERNEL); + if (!cpump) + goto undo; + /* Allocate memory for counter page and counter extraction. + * Only the first counting event has to allocate a page. + */ + mp->mapptr = cpump; + if (idx == PAI_PMU_CRYPTO) { + cpump->area = (unsigned long *)get_zeroed_page(GFP_KERNEL); + /* free_page() can handle 0x0 address */ + cpump->fullpage = true; + } else { /* PAI_PMU_EXT */ + /* + * Allocate memory for counter area and counter extraction. + * These are + * - a 512 byte block and requires 512 byte boundary + * alignment. + * - a 1KB byte block and requires 1KB boundary + * alignment. + * Only the first counting event has to allocate the area. + * + * Note: This works with commit 59bb47985c1d by default. + * Backporting this to kernels without this commit might + * needs adjustment. + */ + cpump->area = kzalloc(pai_pmu[idx].area_size, GFP_KERNEL); + cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL); + need_paiext_cb = true; + } + cpump->save = kvmalloc_array(pai_pmu[idx].num_avail + 1, + sizeof(struct pai_userdata), + GFP_KERNEL); + if (!cpump->area || !cpump->save || + (need_paiext_cb && !cpump->paiext_cb)) { + pai_free(mp); + goto undo; + } + INIT_LIST_HEAD(&cpump->syswide_list); + refcount_set(&cpump->refcnt, 1); + rc = 0; + } else { + refcount_inc(&cpump->refcnt); + } + +undo: + if (rc) { + /* Error in allocation of event, decrement anchor. Since + * the event in not created, its destroy() function is never + * invoked. Adjust the reference counter for the anchor. + */ + pai_root_free(idx); + } +unlock: + mutex_unlock(&pai_reserve_mutex); + /* If rc is non-zero, no increment of counter/sampler was done. */ + return rc; +} + +static int pai_alloc(struct perf_event *event) +{ + struct cpumask *maskptr; + int cpu, rc = -ENOMEM; + + maskptr = kzalloc(sizeof(*maskptr), GFP_KERNEL); + if (!maskptr) + goto out; + + for_each_online_cpu(cpu) { + rc = pai_alloc_cpu(event, cpu); + if (rc) { + for_each_cpu(cpu, maskptr) + pai_event_destroy_cpu(event, cpu); + kfree(maskptr); + goto out; + } + cpumask_set_cpu(cpu, maskptr); + } + + /* + * On error all cpumask are freed and all events have been destroyed. + * Save of which CPUs data structures have been allocated for. + * Release them in pai_event_destroy call back function + * for this event. + */ + PAI_CPU_MASK(event) = maskptr; + rc = 0; +out: + return rc; +} + +/* Validate event number and return error if event is not supported. + * On successful return, PAI_PMU_IDX(event) is set to the index of + * the supporting paing_support[] array element. + */ +static int pai_event_valid(struct perf_event *event, int idx) +{ + struct perf_event_attr *a = &event->attr; + struct pai_pmu *pp = &pai_pmu[idx]; + + /* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */ + if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type) + return -ENOENT; + /* Allow only CRYPTO_ALL/NNPA_ALL for sampling */ + if (a->sample_period && a->config != pp->base) + return -EINVAL; + /* PAI crypto event must be in valid range, try others if not */ + if (a->config < pp->base || a->config > pp->base + pp->num_avail) + return -ENOENT; + if (idx == PAI_PMU_EXT && a->exclude_user) + return -EINVAL; + PAI_PMU_IDX(event) = idx; + return 0; +} + +/* Might be called on different CPU than the one the event is intended for. */ +static int pai_event_init(struct perf_event *event, int idx) +{ + struct perf_event_attr *a = &event->attr; + int rc; + + /* PAI event must be valid and in supported range */ + rc = pai_event_valid(event, idx); + if (rc) + goto out; + /* Get a page to store last counter values for sampling */ + if (a->sample_period) { + PAI_SAVE_AREA(event) = get_zeroed_page(GFP_KERNEL); + if (!PAI_SAVE_AREA(event)) { + rc = -ENOMEM; + goto out; + } + } + + if (event->cpu >= 0) + rc = pai_alloc_cpu(event, event->cpu); + else + rc = pai_alloc(event); + if (rc) { + free_page(PAI_SAVE_AREA(event)); + goto out; + } + + if (a->sample_period) { + a->sample_period = 1; + a->freq = 0; + /* Register for paicrypt_sched_task() to be called */ + event->attach_state |= PERF_ATTACH_SCHED_CB; + /* Add raw data which contain the memory mapped counters */ + a->sample_type |= PERF_SAMPLE_RAW; + /* Turn off inheritance */ + a->inherit = 0; + } +out: + return rc; +} + +static int paicrypt_event_init(struct perf_event *event) +{ + int rc = pai_event_init(event, PAI_PMU_CRYPTO); + + if (!rc) { + event->destroy = paicrypt_event_destroy; + static_branch_inc(&pai_key); + } + return rc; +} + +static void pai_read(struct perf_event *event, + u64 (*fct)(struct perf_event *event)) +{ + u64 prev, new, delta; + + prev = local64_read(&event->hw.prev_count); + new = fct(event); + local64_set(&event->hw.prev_count, new); + delta = (prev <= new) ? new - prev : (-1ULL - prev) + new + 1; + local64_add(delta, &event->count); +} + +static void paicrypt_read(struct perf_event *event) +{ + pai_read(event, paicrypt_getall); +} + +static void pai_start(struct perf_event *event, int flags, + u64 (*fct)(struct perf_event *event)) +{ + int idx = PAI_PMU_IDX(event); + struct pai_pmu *pp = &pai_pmu[idx]; + struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr); + struct pai_map *cpump = mp->mapptr; + u64 sum; + + if (!event->attr.sample_period) { /* Counting */ + sum = fct(event); /* Get current value */ + local64_set(&event->hw.prev_count, sum); + } else { /* Sampling */ + memcpy((void *)PAI_SAVE_AREA(event), cpump->area, pp->area_size); + /* Enable context switch callback for system-wide sampling */ + if (!(event->attach_state & PERF_ATTACH_TASK)) { + list_add_tail(PAI_SWLIST(event), &cpump->syswide_list); + perf_sched_cb_inc(event->pmu); + } else { + cpump->event = event; + } + } +} + +static void paicrypt_start(struct perf_event *event, int flags) +{ + pai_start(event, flags, paicrypt_getall); +} + +static int pai_add(struct perf_event *event, int flags) +{ + int idx = PAI_PMU_IDX(event); + struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr); + struct pai_map *cpump = mp->mapptr; + struct paiext_cb *pcb = cpump->paiext_cb; + unsigned long ccd; + + if (++cpump->active_events == 1) { + if (!pcb) { /* PAI crypto */ + ccd = virt_to_phys(cpump->area) | PAI_CRYPTO_KERNEL_OFFSET; + WRITE_ONCE(get_lowcore()->ccd, ccd); + local_ctl_set_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT); + } else { /* PAI extension 1 */ + ccd = virt_to_phys(pcb); + WRITE_ONCE(get_lowcore()->aicd, ccd); + pcb->acc = virt_to_phys(cpump->area) | 0x1; + /* Enable CPU instruction lookup for PAIE1 control block */ + local_ctl_set_bit(0, CR0_PAI_EXTENSION_BIT); + } + } + if (flags & PERF_EF_START) + pai_pmu[idx].pmu->start(event, PERF_EF_RELOAD); + event->hw.state = 0; + return 0; +} + +static int paicrypt_add(struct perf_event *event, int flags) +{ + return pai_add(event, flags); +} + +static void pai_have_sample(struct perf_event *, struct pai_map *); +static void pai_stop(struct perf_event *event, int flags) +{ + int idx = PAI_PMU_IDX(event); + struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr); + struct pai_map *cpump = mp->mapptr; + + if (!event->attr.sample_period) { /* Counting */ + pai_pmu[idx].pmu->read(event); + } else { /* Sampling */ + if (!(event->attach_state & PERF_ATTACH_TASK)) { + perf_sched_cb_dec(event->pmu); + list_del(PAI_SWLIST(event)); + } else { + pai_have_sample(event, cpump); + cpump->event = NULL; + } + } + event->hw.state = PERF_HES_STOPPED; +} + +static void paicrypt_stop(struct perf_event *event, int flags) +{ + pai_stop(event, flags); +} + +static void pai_del(struct perf_event *event, int flags) +{ + int idx = PAI_PMU_IDX(event); + struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr); + struct pai_map *cpump = mp->mapptr; + struct paiext_cb *pcb = cpump->paiext_cb; + + pai_pmu[idx].pmu->stop(event, PERF_EF_UPDATE); + if (--cpump->active_events == 0) { + if (!pcb) { /* PAI crypto */ + local_ctl_clear_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT); + WRITE_ONCE(get_lowcore()->ccd, 0); + } else { /* PAI extension 1 */ + /* Disable CPU instruction lookup for PAIE1 control block */ + local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT); + pcb->acc = 0; + WRITE_ONCE(get_lowcore()->aicd, 0); + } + } +} + +static void paicrypt_del(struct perf_event *event, int flags) +{ + pai_del(event, flags); +} + +/* Create raw data and save it in buffer. Calculate the delta for each + * counter between this invocation and the last invocation. + * Returns number of bytes copied. + * Saves only entries with positive counter difference of the form + * 2 bytes: Number of counter + * 8 bytes: Value of counter + */ +static size_t pai_copy(struct pai_userdata *userdata, unsigned long *page, + struct pai_pmu *pp, unsigned long *page_old, + bool exclude_user, bool exclude_kernel) +{ + int i, outidx = 0; + + for (i = 1; i <= pp->num_avail; i++) { + u64 val = 0, val_old = 0; + + if (!exclude_kernel) { + val += pai_getctr(page, i, pp->kernel_offset); + val_old += pai_getctr(page_old, i, pp->kernel_offset); + } + if (!exclude_user) { + val += pai_getctr(page, i, 0); + val_old += pai_getctr(page_old, i, 0); + } + if (val >= val_old) + val -= val_old; + else + val = (~0ULL - val_old) + val + 1; + if (val) { + userdata[outidx].num = i; + userdata[outidx].value = val; + outidx++; + } + } + return outidx * sizeof(*userdata); +} + +/* Write sample when one or more counters values are nonzero. + * + * Note: The function paicrypt_sched_task() and pai_push_sample() are not + * invoked after function paicrypt_del() has been called because of function + * perf_sched_cb_dec(). Both functions are only + * called when sampling is active. Function perf_sched_cb_inc() + * has been invoked to install function paicrypt_sched_task() as call back + * to run at context switch time. + * + * This causes function perf_event_context_sched_out() and + * perf_event_context_sched_in() to check whether the PMU has installed an + * sched_task() callback. That callback is not active after paicrypt_del() + * returns and has deleted the event on that CPU. + */ +static int pai_push_sample(size_t rawsize, struct pai_map *cpump, + struct perf_event *event) +{ + int idx = PAI_PMU_IDX(event); + struct pai_pmu *pp = &pai_pmu[idx]; + struct perf_sample_data data; + struct perf_raw_record raw; + struct pt_regs regs; + int overflow; + + /* Setup perf sample */ + memset(®s, 0, sizeof(regs)); + memset(&raw, 0, sizeof(raw)); + memset(&data, 0, sizeof(data)); + perf_sample_data_init(&data, 0, event->hw.last_period); + if (event->attr.sample_type & PERF_SAMPLE_TID) { + data.tid_entry.pid = task_tgid_nr(current); + data.tid_entry.tid = task_pid_nr(current); + } + if (event->attr.sample_type & PERF_SAMPLE_TIME) + data.time = event->clock(); + if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) + data.id = event->id; + if (event->attr.sample_type & PERF_SAMPLE_CPU) { + data.cpu_entry.cpu = smp_processor_id(); + data.cpu_entry.reserved = 0; + } + if (event->attr.sample_type & PERF_SAMPLE_RAW) { + raw.frag.size = rawsize; + raw.frag.data = cpump->save; + perf_sample_save_raw_data(&data, event, &raw); + } + + overflow = perf_event_overflow(event, &data, ®s); + perf_event_update_userpage(event); + /* Save crypto counter lowcore page after reading event data. */ + memcpy((void *)PAI_SAVE_AREA(event), cpump->area, pp->area_size); + return overflow; +} + +/* Check if there is data to be saved on schedule out of a task. */ +static void pai_have_sample(struct perf_event *event, struct pai_map *cpump) +{ + struct pai_pmu *pp; + size_t rawsize; + + if (!event) /* No event active */ + return; + pp = &pai_pmu[PAI_PMU_IDX(event)]; + rawsize = pai_copy(cpump->save, cpump->area, pp, + (unsigned long *)PAI_SAVE_AREA(event), + event->attr.exclude_user, + event->attr.exclude_kernel); + if (rawsize) /* No incremented counters */ + pai_push_sample(rawsize, cpump, event); +} + +/* Check if there is data to be saved on schedule out of a task. */ +static void pai_have_samples(int idx) +{ + struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr); + struct pai_map *cpump = mp->mapptr; + struct perf_event *event; + + list_for_each_entry(event, &cpump->syswide_list, hw.tp_list) + pai_have_sample(event, cpump); +} + +/* Called on schedule-in and schedule-out. No access to event structure, + * but for sampling only event CRYPTO_ALL is allowed. + */ +static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) +{ + /* We started with a clean page on event installation. So read out + * results on schedule_out and if page was dirty, save old values. + */ + if (!sched_in) + pai_have_samples(PAI_PMU_CRYPTO); +} + +/* ============================= paiext ====================================*/ + +static void paiext_event_destroy(struct perf_event *event) +{ + pai_event_destroy(event); +} + +/* Might be called on different CPU than the one the event is intended for. */ +static int paiext_event_init(struct perf_event *event) +{ + int rc = pai_event_init(event, PAI_PMU_EXT); + + if (!rc) { + event->attr.exclude_kernel = true; /* No kernel space part */ + event->destroy = paiext_event_destroy; + /* Offset of NNPA in paiext_cb */ + event->hw.config_base = offsetof(struct paiext_cb, acc); + } + return rc; +} + +static u64 paiext_getall(struct perf_event *event) +{ + return pai_getdata(event, false); +} + +static void paiext_read(struct perf_event *event) +{ + pai_read(event, paiext_getall); +} + +static void paiext_start(struct perf_event *event, int flags) +{ + pai_start(event, flags, paiext_getall); +} + +static int paiext_add(struct perf_event *event, int flags) +{ + return pai_add(event, flags); +} + +static void paiext_stop(struct perf_event *event, int flags) +{ + pai_stop(event, flags); +} + +static void paiext_del(struct perf_event *event, int flags) +{ + pai_del(event, flags); +} + +/* Called on schedule-in and schedule-out. No access to event structure, + * but for sampling only event NNPA_ALL is allowed. + */ +static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) +{ + /* We started with a clean page on event installation. So read out + * results on schedule_out and if page was dirty, save old values. + */ + if (!sched_in) + pai_have_samples(PAI_PMU_EXT); +} + +/* Attribute definitions for paicrypt interface. As with other CPU + * Measurement Facilities, there is one attribute per mapped counter. + * The number of mapped counters may vary per machine generation. Use + * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction + * to determine the number of mapped counters. The instructions returns + * a positive number, which is the highest number of supported counters. + * All counters less than this number are also supported, there are no + * holes. A returned number of zero means no support for mapped counters. + * + * The identification of the counter is a unique number. The chosen range + * is 0x1000 + offset in mapped kernel page. + * All CPU Measurement Facility counters identifiers must be unique and + * the numbers from 0 to 496 are already used for the CPU Measurement + * Counter facility. Numbers 0xb0000, 0xbc000 and 0xbd000 are already + * used for the CPU Measurement Sampling facility. + */ +PMU_FORMAT_ATTR(event, "config:0-63"); + +static struct attribute *paicrypt_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group paicrypt_events_group = { + .name = "events", + .attrs = NULL /* Filled in attr_event_init() */ +}; + +static struct attribute_group paicrypt_format_group = { + .name = "format", + .attrs = paicrypt_format_attr, +}; + +static const struct attribute_group *paicrypt_attr_groups[] = { + &paicrypt_events_group, + &paicrypt_format_group, + NULL, +}; + +/* Performance monitoring unit for mapped counters */ +static struct pmu paicrypt = { + .task_ctx_nr = perf_hw_context, + .event_init = paicrypt_event_init, + .add = paicrypt_add, + .del = paicrypt_del, + .start = paicrypt_start, + .stop = paicrypt_stop, + .read = paicrypt_read, + .sched_task = paicrypt_sched_task, + .attr_groups = paicrypt_attr_groups +}; + +/* List of symbolic PAI counter names. */ +static const char * const paicrypt_ctrnames[] = { + [0] = "CRYPTO_ALL", + [1] = "KM_DEA", + [2] = "KM_TDEA_128", + [3] = "KM_TDEA_192", + [4] = "KM_ENCRYPTED_DEA", + [5] = "KM_ENCRYPTED_TDEA_128", + [6] = "KM_ENCRYPTED_TDEA_192", + [7] = "KM_AES_128", + [8] = "KM_AES_192", + [9] = "KM_AES_256", + [10] = "KM_ENCRYPTED_AES_128", + [11] = "KM_ENCRYPTED_AES_192", + [12] = "KM_ENCRYPTED_AES_256", + [13] = "KM_XTS_AES_128", + [14] = "KM_XTS_AES_256", + [15] = "KM_XTS_ENCRYPTED_AES_128", + [16] = "KM_XTS_ENCRYPTED_AES_256", + [17] = "KMC_DEA", + [18] = "KMC_TDEA_128", + [19] = "KMC_TDEA_192", + [20] = "KMC_ENCRYPTED_DEA", + [21] = "KMC_ENCRYPTED_TDEA_128", + [22] = "KMC_ENCRYPTED_TDEA_192", + [23] = "KMC_AES_128", + [24] = "KMC_AES_192", + [25] = "KMC_AES_256", + [26] = "KMC_ENCRYPTED_AES_128", + [27] = "KMC_ENCRYPTED_AES_192", + [28] = "KMC_ENCRYPTED_AES_256", + [29] = "KMC_PRNG", + [30] = "KMA_GCM_AES_128", + [31] = "KMA_GCM_AES_192", + [32] = "KMA_GCM_AES_256", + [33] = "KMA_GCM_ENCRYPTED_AES_128", + [34] = "KMA_GCM_ENCRYPTED_AES_192", + [35] = "KMA_GCM_ENCRYPTED_AES_256", + [36] = "KMF_DEA", + [37] = "KMF_TDEA_128", + [38] = "KMF_TDEA_192", + [39] = "KMF_ENCRYPTED_DEA", + [40] = "KMF_ENCRYPTED_TDEA_128", + [41] = "KMF_ENCRYPTED_TDEA_192", + [42] = "KMF_AES_128", + [43] = "KMF_AES_192", + [44] = "KMF_AES_256", + [45] = "KMF_ENCRYPTED_AES_128", + [46] = "KMF_ENCRYPTED_AES_192", + [47] = "KMF_ENCRYPTED_AES_256", + [48] = "KMCTR_DEA", + [49] = "KMCTR_TDEA_128", + [50] = "KMCTR_TDEA_192", + [51] = "KMCTR_ENCRYPTED_DEA", + [52] = "KMCTR_ENCRYPTED_TDEA_128", + [53] = "KMCTR_ENCRYPTED_TDEA_192", + [54] = "KMCTR_AES_128", + [55] = "KMCTR_AES_192", + [56] = "KMCTR_AES_256", + [57] = "KMCTR_ENCRYPTED_AES_128", + [58] = "KMCTR_ENCRYPTED_AES_192", + [59] = "KMCTR_ENCRYPTED_AES_256", + [60] = "KMO_DEA", + [61] = "KMO_TDEA_128", + [62] = "KMO_TDEA_192", + [63] = "KMO_ENCRYPTED_DEA", + [64] = "KMO_ENCRYPTED_TDEA_128", + [65] = "KMO_ENCRYPTED_TDEA_192", + [66] = "KMO_AES_128", + [67] = "KMO_AES_192", + [68] = "KMO_AES_256", + [69] = "KMO_ENCRYPTED_AES_128", + [70] = "KMO_ENCRYPTED_AES_192", + [71] = "KMO_ENCRYPTED_AES_256", + [72] = "KIMD_SHA_1", + [73] = "KIMD_SHA_256", + [74] = "KIMD_SHA_512", + [75] = "KIMD_SHA3_224", + [76] = "KIMD_SHA3_256", + [77] = "KIMD_SHA3_384", + [78] = "KIMD_SHA3_512", + [79] = "KIMD_SHAKE_128", + [80] = "KIMD_SHAKE_256", + [81] = "KIMD_GHASH", + [82] = "KLMD_SHA_1", + [83] = "KLMD_SHA_256", + [84] = "KLMD_SHA_512", + [85] = "KLMD_SHA3_224", + [86] = "KLMD_SHA3_256", + [87] = "KLMD_SHA3_384", + [88] = "KLMD_SHA3_512", + [89] = "KLMD_SHAKE_128", + [90] = "KLMD_SHAKE_256", + [91] = "KMAC_DEA", + [92] = "KMAC_TDEA_128", + [93] = "KMAC_TDEA_192", + [94] = "KMAC_ENCRYPTED_DEA", + [95] = "KMAC_ENCRYPTED_TDEA_128", + [96] = "KMAC_ENCRYPTED_TDEA_192", + [97] = "KMAC_AES_128", + [98] = "KMAC_AES_192", + [99] = "KMAC_AES_256", + [100] = "KMAC_ENCRYPTED_AES_128", + [101] = "KMAC_ENCRYPTED_AES_192", + [102] = "KMAC_ENCRYPTED_AES_256", + [103] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_DEA", + [104] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_128", + [105] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_192", + [106] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_DEA", + [107] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_128", + [108] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_192", + [109] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_128", + [110] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_192", + [111] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_256", + [112] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_128", + [113] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_192", + [114] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256", + [115] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_128", + [116] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_256", + [117] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_128", + [118] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_256", + [119] = "PCC_SCALAR_MULTIPLY_P256", + [120] = "PCC_SCALAR_MULTIPLY_P384", + [121] = "PCC_SCALAR_MULTIPLY_P521", + [122] = "PCC_SCALAR_MULTIPLY_ED25519", + [123] = "PCC_SCALAR_MULTIPLY_ED448", + [124] = "PCC_SCALAR_MULTIPLY_X25519", + [125] = "PCC_SCALAR_MULTIPLY_X448", + [126] = "PRNO_SHA_512_DRNG", + [127] = "PRNO_TRNG_QUERY_RAW_TO_CONDITIONED_RATIO", + [128] = "PRNO_TRNG", + [129] = "KDSA_ECDSA_VERIFY_P256", + [130] = "KDSA_ECDSA_VERIFY_P384", + [131] = "KDSA_ECDSA_VERIFY_P521", + [132] = "KDSA_ECDSA_SIGN_P256", + [133] = "KDSA_ECDSA_SIGN_P384", + [134] = "KDSA_ECDSA_SIGN_P521", + [135] = "KDSA_ENCRYPTED_ECDSA_SIGN_P256", + [136] = "KDSA_ENCRYPTED_ECDSA_SIGN_P384", + [137] = "KDSA_ENCRYPTED_ECDSA_SIGN_P521", + [138] = "KDSA_EDDSA_VERIFY_ED25519", + [139] = "KDSA_EDDSA_VERIFY_ED448", + [140] = "KDSA_EDDSA_SIGN_ED25519", + [141] = "KDSA_EDDSA_SIGN_ED448", + [142] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED25519", + [143] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED448", + [144] = "PCKMO_ENCRYPT_DEA_KEY", + [145] = "PCKMO_ENCRYPT_TDEA_128_KEY", + [146] = "PCKMO_ENCRYPT_TDEA_192_KEY", + [147] = "PCKMO_ENCRYPT_AES_128_KEY", + [148] = "PCKMO_ENCRYPT_AES_192_KEY", + [149] = "PCKMO_ENCRYPT_AES_256_KEY", + [150] = "PCKMO_ENCRYPT_ECC_P256_KEY", + [151] = "PCKMO_ENCRYPT_ECC_P384_KEY", + [152] = "PCKMO_ENCRYPT_ECC_P521_KEY", + [153] = "PCKMO_ENCRYPT_ECC_ED25519_KEY", + [154] = "PCKMO_ENCRYPT_ECC_ED448_KEY", + [155] = "IBM_RESERVED_155", + [156] = "IBM_RESERVED_156", + [157] = "KM_FULL_XTS_AES_128", + [158] = "KM_FULL_XTS_AES_256", + [159] = "KM_FULL_XTS_ENCRYPTED_AES_128", + [160] = "KM_FULL_XTS_ENCRYPTED_AES_256", + [161] = "KMAC_HMAC_SHA_224", + [162] = "KMAC_HMAC_SHA_256", + [163] = "KMAC_HMAC_SHA_384", + [164] = "KMAC_HMAC_SHA_512", + [165] = "KMAC_HMAC_ENCRYPTED_SHA_224", + [166] = "KMAC_HMAC_ENCRYPTED_SHA_256", + [167] = "KMAC_HMAC_ENCRYPTED_SHA_384", + [168] = "KMAC_HMAC_ENCRYPTED_SHA_512", + [169] = "PCKMO_ENCRYPT_HMAC_512_KEY", + [170] = "PCKMO_ENCRYPT_HMAC_1024_KEY", + [171] = "PCKMO_ENCRYPT_AES_XTS_128", + [172] = "PCKMO_ENCRYPT_AES_XTS_256", +}; + +static struct attribute *paiext_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group paiext_events_group = { + .name = "events", + .attrs = NULL, /* Filled in attr_event_init() */ +}; + +static struct attribute_group paiext_format_group = { + .name = "format", + .attrs = paiext_format_attr, +}; + +static const struct attribute_group *paiext_attr_groups[] = { + &paiext_events_group, + &paiext_format_group, + NULL, +}; + +/* Performance monitoring unit for mapped counters */ +static struct pmu paiext = { + .task_ctx_nr = perf_hw_context, + .event_init = paiext_event_init, + .add = paiext_add, + .del = paiext_del, + .start = paiext_start, + .stop = paiext_stop, + .read = paiext_read, + .sched_task = paiext_sched_task, + .attr_groups = paiext_attr_groups, +}; + +/* List of symbolic PAI extension 1 NNPA counter names. */ +static const char * const paiext_ctrnames[] = { + [0] = "NNPA_ALL", + [1] = "NNPA_ADD", + [2] = "NNPA_SUB", + [3] = "NNPA_MUL", + [4] = "NNPA_DIV", + [5] = "NNPA_MIN", + [6] = "NNPA_MAX", + [7] = "NNPA_LOG", + [8] = "NNPA_EXP", + [9] = "NNPA_IBM_RESERVED_9", + [10] = "NNPA_RELU", + [11] = "NNPA_TANH", + [12] = "NNPA_SIGMOID", + [13] = "NNPA_SOFTMAX", + [14] = "NNPA_BATCHNORM", + [15] = "NNPA_MAXPOOL2D", + [16] = "NNPA_AVGPOOL2D", + [17] = "NNPA_LSTMACT", + [18] = "NNPA_GRUACT", + [19] = "NNPA_CONVOLUTION", + [20] = "NNPA_MATMUL_OP", + [21] = "NNPA_MATMUL_OP_BCAST23", + [22] = "NNPA_SMALLBATCH", + [23] = "NNPA_LARGEDIM", + [24] = "NNPA_SMALLTENSOR", + [25] = "NNPA_1MFRAME", + [26] = "NNPA_2GFRAME", + [27] = "NNPA_ACCESSEXCEPT", + [28] = "NNPA_TRANSFORM", + [29] = "NNPA_GELU", + [30] = "NNPA_MOMENTS", + [31] = "NNPA_LAYERNORM", + [32] = "NNPA_MATMUL_OP_BCAST1", + [33] = "NNPA_SQRT", + [34] = "NNPA_INVSQRT", + [35] = "NNPA_NORM", + [36] = "NNPA_REDUCE", +}; + +static void __init attr_event_free(struct attribute **attrs) +{ + struct perf_pmu_events_attr *pa; + unsigned int i; + + for (i = 0; attrs[i]; i++) { + struct device_attribute *dap; + + dap = container_of(attrs[i], struct device_attribute, attr); + pa = container_of(dap, struct perf_pmu_events_attr, attr); + kfree(pa); + } + kfree(attrs); +} + +static struct attribute * __init attr_event_init_one(int num, + unsigned long base, + const char *name) +{ + struct perf_pmu_events_attr *pa; + + pa = kzalloc(sizeof(*pa), GFP_KERNEL); + if (!pa) + return NULL; + + sysfs_attr_init(&pa->attr.attr); + pa->id = base + num; + pa->attr.attr.name = name; + pa->attr.attr.mode = 0444; + pa->attr.show = cpumf_events_sysfs_show; + pa->attr.store = NULL; + return &pa->attr.attr; +} + +static struct attribute ** __init attr_event_init(struct pai_pmu *p) +{ + unsigned int min_attr = min_t(unsigned int, p->num_named, p->num_avail); + struct attribute **attrs; + unsigned int i; + + attrs = kmalloc_array(min_attr + 1, sizeof(*attrs), GFP_KERNEL | __GFP_ZERO); + if (!attrs) + goto out; + for (i = 0; i < min_attr; i++) { + attrs[i] = attr_event_init_one(i, p->base, p->names[i]); + if (!attrs[i]) { + attr_event_free(attrs); + attrs = NULL; + goto out; + } + } + attrs[i] = NULL; +out: + return attrs; +} + +static void __init pai_pmu_exit(struct pai_pmu *p) +{ + attr_event_free(p->event_group->attrs); + p->event_group->attrs = NULL; +} + +/* Add a PMU. Install its events and register the PMU device driver + * call back functions. + */ +static int __init pai_pmu_init(struct pai_pmu *p) +{ + int rc = -ENOMEM; + + + /* Export known PAI events */ + p->event_group->attrs = attr_event_init(p); + if (!p->event_group->attrs) { + pr_err("Creation of PMU %s /sysfs failed\n", p->pmuname); + goto out; + } + + rc = perf_pmu_register(p->pmu, p->pmuname, -1); + if (rc) { + pai_pmu_exit(p); + pr_err("Registering PMU %s failed with rc=%i\n", p->pmuname, + rc); + } +out: + return rc; +} + +/* PAI PMU characteristics table */ +static struct pai_pmu pai_pmu[] __refdata = { + [PAI_PMU_CRYPTO] = { + .pmuname = "pai_crypto", + .facility_nr = 196, + .num_named = ARRAY_SIZE(paicrypt_ctrnames), + .names = paicrypt_ctrnames, + .base = PAI_CRYPTO_BASE, + .kernel_offset = PAI_CRYPTO_KERNEL_OFFSET, + .area_size = PAGE_SIZE, + .init = pai_pmu_init, + .exit = pai_pmu_exit, + .pmu = &paicrypt, + .event_group = &paicrypt_events_group + }, + [PAI_PMU_EXT] = { + .pmuname = "pai_ext", + .facility_nr = 197, + .num_named = ARRAY_SIZE(paiext_ctrnames), + .names = paiext_ctrnames, + .base = PAI_NNPA_BASE, + .kernel_offset = 0, + .area_size = PAIE1_CTRBLOCK_SZ, + .init = pai_pmu_init, + .exit = pai_pmu_exit, + .pmu = &paiext, + .event_group = &paiext_events_group + } +}; + +/* + * Check if the PMU (via facility) is supported by machine. Try all of the + * supported PAI PMUs. + * Return number of successfully installed PMUs. + */ +static int __init paipmu_setup(void) +{ + struct qpaci_info_block ib; + int install_ok = 0, rc; + struct pai_pmu *p; + size_t i; + + for (i = 0; i < ARRAY_SIZE(pai_pmu); ++i) { + p = &pai_pmu[i]; + + if (!test_facility(p->facility_nr)) + continue; + + qpaci(&ib); + switch (i) { + case PAI_PMU_CRYPTO: + p->num_avail = ib.num_cc; + if (p->num_avail >= PAI_CRYPTO_MAXCTR) { + pr_err("Too many PMU %s counters %d\n", + p->pmuname, p->num_avail); + continue; + } + break; + case PAI_PMU_EXT: + p->num_avail = ib.num_nnpa; + break; + } + p->num_avail += 1; /* Add xxx_ALL event */ + if (p->init) { + rc = p->init(p); + if (!rc) + ++install_ok; + } + } + return install_ok; +} + +static int __init pai_init(void) +{ + /* Setup s390dbf facility */ + paidbg = debug_register("pai", 32, 256, 128); + if (!paidbg) { + pr_err("Registration of s390dbf pai failed\n"); + return -ENOMEM; + } + debug_register_view(paidbg, &debug_sprintf_view); + + if (!paipmu_setup()) { + /* No PMU registration, no need for debug buffer */ + debug_unregister_view(paidbg, &debug_sprintf_view); + debug_unregister(paidbg); + return -ENODEV; + } + return 0; +} + +device_initcall(pai_init); diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c deleted file mode 100644 index fa7325454266..000000000000 --- a/arch/s390/kernel/perf_pai_crypto.c +++ /dev/null @@ -1,861 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Performance event support - Processor Activity Instrumentation Facility - * - * Copyright IBM Corp. 2022 - * Author(s): Thomas Richter <tmricht@linux.ibm.com> - */ -#define KMSG_COMPONENT "pai_crypto" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include <linux/kernel.h> -#include <linux/kernel_stat.h> -#include <linux/percpu.h> -#include <linux/notifier.h> -#include <linux/init.h> -#include <linux/export.h> -#include <linux/io.h> -#include <linux/perf_event.h> -#include <asm/ctlreg.h> -#include <asm/pai.h> -#include <asm/debug.h> - -static debug_info_t *cfm_dbg; -static unsigned int paicrypt_cnt; /* Size of the mapped counter sets */ - /* extracted with QPACI instruction */ - -DEFINE_STATIC_KEY_FALSE(pai_key); - -struct pai_userdata { - u16 num; - u64 value; -} __packed; - -struct paicrypt_map { - unsigned long *page; /* Page for CPU to store counters */ - struct pai_userdata *save; /* Page to store no-zero counters */ - unsigned int active_events; /* # of PAI crypto users */ - refcount_t refcnt; /* Reference count mapped buffers */ - struct perf_event *event; /* Perf event for sampling */ - struct list_head syswide_list; /* List system-wide sampling events */ -}; - -struct paicrypt_mapptr { - struct paicrypt_map *mapptr; -}; - -static struct paicrypt_root { /* Anchor to per CPU data */ - refcount_t refcnt; /* Overall active events */ - struct paicrypt_mapptr __percpu *mapptr; -} paicrypt_root; - -/* Free per CPU data when the last event is removed. */ -static void paicrypt_root_free(void) -{ - if (refcount_dec_and_test(&paicrypt_root.refcnt)) { - free_percpu(paicrypt_root.mapptr); - paicrypt_root.mapptr = NULL; - } - debug_sprintf_event(cfm_dbg, 5, "%s root.refcount %d\n", __func__, - refcount_read(&paicrypt_root.refcnt)); -} - -/* - * On initialization of first event also allocate per CPU data dynamically. - * Start with an array of pointers, the array size is the maximum number of - * CPUs possible, which might be larger than the number of CPUs currently - * online. - */ -static int paicrypt_root_alloc(void) -{ - if (!refcount_inc_not_zero(&paicrypt_root.refcnt)) { - /* The memory is already zeroed. */ - paicrypt_root.mapptr = alloc_percpu(struct paicrypt_mapptr); - if (!paicrypt_root.mapptr) - return -ENOMEM; - refcount_set(&paicrypt_root.refcnt, 1); - } - return 0; -} - -/* Release the PMU if event is the last perf event */ -static DEFINE_MUTEX(pai_reserve_mutex); - -/* Adjust usage counters and remove allocated memory when all users are - * gone. - */ -static void paicrypt_event_destroy_cpu(struct perf_event *event, int cpu) -{ - struct paicrypt_mapptr *mp = per_cpu_ptr(paicrypt_root.mapptr, cpu); - struct paicrypt_map *cpump = mp->mapptr; - - mutex_lock(&pai_reserve_mutex); - debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d " - "refcnt %u\n", __func__, event->attr.config, - event->cpu, cpump->active_events, - refcount_read(&cpump->refcnt)); - if (refcount_dec_and_test(&cpump->refcnt)) { - debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n", - __func__, (unsigned long)cpump->page, - cpump->save); - free_page((unsigned long)cpump->page); - kvfree(cpump->save); - kfree(cpump); - mp->mapptr = NULL; - } - paicrypt_root_free(); - mutex_unlock(&pai_reserve_mutex); -} - -static void paicrypt_event_destroy(struct perf_event *event) -{ - int cpu; - - static_branch_dec(&pai_key); - free_page(PAI_SAVE_AREA(event)); - if (event->cpu == -1) { - struct cpumask *mask = PAI_CPU_MASK(event); - - for_each_cpu(cpu, mask) - paicrypt_event_destroy_cpu(event, cpu); - kfree(mask); - } else { - paicrypt_event_destroy_cpu(event, event->cpu); - } -} - -static u64 paicrypt_getctr(unsigned long *page, int nr, bool kernel) -{ - if (kernel) - nr += PAI_CRYPTO_MAXCTR; - return page[nr]; -} - -/* Read the counter values. Return value from location in CMP. For event - * CRYPTO_ALL sum up all events. - */ -static u64 paicrypt_getdata(struct perf_event *event, bool kernel) -{ - struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); - struct paicrypt_map *cpump = mp->mapptr; - u64 sum = 0; - int i; - - if (event->attr.config != PAI_CRYPTO_BASE) { - return paicrypt_getctr(cpump->page, - event->attr.config - PAI_CRYPTO_BASE, - kernel); - } - - for (i = 1; i <= paicrypt_cnt; i++) { - u64 val = paicrypt_getctr(cpump->page, i, kernel); - - if (!val) - continue; - sum += val; - } - return sum; -} - -static u64 paicrypt_getall(struct perf_event *event) -{ - u64 sum = 0; - - if (!event->attr.exclude_kernel) - sum += paicrypt_getdata(event, true); - if (!event->attr.exclude_user) - sum += paicrypt_getdata(event, false); - - return sum; -} - -/* Check concurrent access of counting and sampling for crypto events. - * This function is called in process context and it is save to block. - * When the event initialization functions fails, no other call back will - * be invoked. - * - * Allocate the memory for the event. - */ -static struct paicrypt_map *paicrypt_busy(struct perf_event *event, int cpu) -{ - struct paicrypt_map *cpump = NULL; - struct paicrypt_mapptr *mp; - int rc; - - mutex_lock(&pai_reserve_mutex); - - /* Allocate root node */ - rc = paicrypt_root_alloc(); - if (rc) - goto unlock; - - /* Allocate node for this event */ - mp = per_cpu_ptr(paicrypt_root.mapptr, cpu); - cpump = mp->mapptr; - if (!cpump) { /* Paicrypt_map allocated? */ - cpump = kzalloc(sizeof(*cpump), GFP_KERNEL); - if (!cpump) { - rc = -ENOMEM; - goto free_root; - } - INIT_LIST_HEAD(&cpump->syswide_list); - } - - /* Allocate memory for counter page and counter extraction. - * Only the first counting event has to allocate a page. - */ - if (cpump->page) { - refcount_inc(&cpump->refcnt); - goto unlock; - } - - rc = -ENOMEM; - cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL); - if (!cpump->page) - goto free_paicrypt_map; - cpump->save = kvmalloc_array(paicrypt_cnt + 1, - sizeof(struct pai_userdata), GFP_KERNEL); - if (!cpump->save) { - free_page((unsigned long)cpump->page); - cpump->page = NULL; - goto free_paicrypt_map; - } - - /* Set mode and reference count */ - rc = 0; - refcount_set(&cpump->refcnt, 1); - mp->mapptr = cpump; - debug_sprintf_event(cfm_dbg, 5, "%s users %d refcnt %u page %#lx " - "save %p rc %d\n", __func__, cpump->active_events, - refcount_read(&cpump->refcnt), - (unsigned long)cpump->page, cpump->save, rc); - goto unlock; - -free_paicrypt_map: - /* Undo memory allocation */ - kfree(cpump); - mp->mapptr = NULL; -free_root: - paicrypt_root_free(); -unlock: - mutex_unlock(&pai_reserve_mutex); - return rc ? ERR_PTR(rc) : cpump; -} - -static int paicrypt_event_init_all(struct perf_event *event) -{ - struct paicrypt_map *cpump; - struct cpumask *maskptr; - int cpu, rc = -ENOMEM; - - maskptr = kzalloc(sizeof(*maskptr), GFP_KERNEL); - if (!maskptr) - goto out; - - for_each_online_cpu(cpu) { - cpump = paicrypt_busy(event, cpu); - if (IS_ERR(cpump)) { - for_each_cpu(cpu, maskptr) - paicrypt_event_destroy_cpu(event, cpu); - kfree(maskptr); - rc = PTR_ERR(cpump); - goto out; - } - cpumask_set_cpu(cpu, maskptr); - } - - /* - * On error all cpumask are freed and all events have been destroyed. - * Save of which CPUs data structures have been allocated for. - * Release them in paicrypt_event_destroy call back function - * for this event. - */ - PAI_CPU_MASK(event) = maskptr; - rc = 0; -out: - return rc; -} - -/* Might be called on different CPU than the one the event is intended for. */ -static int paicrypt_event_init(struct perf_event *event) -{ - struct perf_event_attr *a = &event->attr; - struct paicrypt_map *cpump; - int rc = 0; - - /* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */ - if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type) - return -ENOENT; - /* PAI crypto event must be in valid range */ - if (a->config < PAI_CRYPTO_BASE || - a->config > PAI_CRYPTO_BASE + paicrypt_cnt) - return -EINVAL; - /* Allow only CRYPTO_ALL for sampling */ - if (a->sample_period && a->config != PAI_CRYPTO_BASE) - return -EINVAL; - /* Get a page to store last counter values for sampling */ - if (a->sample_period) { - PAI_SAVE_AREA(event) = get_zeroed_page(GFP_KERNEL); - if (!PAI_SAVE_AREA(event)) { - rc = -ENOMEM; - goto out; - } - } - - if (event->cpu >= 0) { - cpump = paicrypt_busy(event, event->cpu); - if (IS_ERR(cpump)) - rc = PTR_ERR(cpump); - } else { - rc = paicrypt_event_init_all(event); - } - if (rc) { - free_page(PAI_SAVE_AREA(event)); - goto out; - } - event->destroy = paicrypt_event_destroy; - - if (a->sample_period) { - a->sample_period = 1; - a->freq = 0; - /* Register for paicrypt_sched_task() to be called */ - event->attach_state |= PERF_ATTACH_SCHED_CB; - /* Add raw data which contain the memory mapped counters */ - a->sample_type |= PERF_SAMPLE_RAW; - /* Turn off inheritance */ - a->inherit = 0; - } - - static_branch_inc(&pai_key); -out: - return rc; -} - -static void paicrypt_read(struct perf_event *event) -{ - u64 prev, new, delta; - - prev = local64_read(&event->hw.prev_count); - new = paicrypt_getall(event); - local64_set(&event->hw.prev_count, new); - delta = (prev <= new) ? new - prev - : (-1ULL - prev) + new + 1; /* overflow */ - local64_add(delta, &event->count); -} - -static void paicrypt_start(struct perf_event *event, int flags) -{ - struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); - struct paicrypt_map *cpump = mp->mapptr; - u64 sum; - - if (!event->attr.sample_period) { /* Counting */ - sum = paicrypt_getall(event); /* Get current value */ - local64_set(&event->hw.prev_count, sum); - } else { /* Sampling */ - memcpy((void *)PAI_SAVE_AREA(event), cpump->page, PAGE_SIZE); - /* Enable context switch callback for system-wide sampling */ - if (!(event->attach_state & PERF_ATTACH_TASK)) { - list_add_tail(PAI_SWLIST(event), &cpump->syswide_list); - perf_sched_cb_inc(event->pmu); - } else { - cpump->event = event; - } - } -} - -static int paicrypt_add(struct perf_event *event, int flags) -{ - struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); - struct paicrypt_map *cpump = mp->mapptr; - unsigned long ccd; - - if (++cpump->active_events == 1) { - ccd = virt_to_phys(cpump->page) | PAI_CRYPTO_KERNEL_OFFSET; - WRITE_ONCE(get_lowcore()->ccd, ccd); - local_ctl_set_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT); - } - if (flags & PERF_EF_START) - paicrypt_start(event, PERF_EF_RELOAD); - event->hw.state = 0; - return 0; -} - -static void paicrypt_have_sample(struct perf_event *, struct paicrypt_map *); -static void paicrypt_stop(struct perf_event *event, int flags) -{ - struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); - struct paicrypt_map *cpump = mp->mapptr; - - if (!event->attr.sample_period) { /* Counting */ - paicrypt_read(event); - } else { /* Sampling */ - if (!(event->attach_state & PERF_ATTACH_TASK)) { - perf_sched_cb_dec(event->pmu); - list_del(PAI_SWLIST(event)); - } else { - paicrypt_have_sample(event, cpump); - cpump->event = NULL; - } - } - event->hw.state = PERF_HES_STOPPED; -} - -static void paicrypt_del(struct perf_event *event, int flags) -{ - struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); - struct paicrypt_map *cpump = mp->mapptr; - - paicrypt_stop(event, PERF_EF_UPDATE); - if (--cpump->active_events == 0) { - local_ctl_clear_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT); - WRITE_ONCE(get_lowcore()->ccd, 0); - } -} - -/* Create raw data and save it in buffer. Calculate the delta for each - * counter between this invocation and the last invocation. - * Returns number of bytes copied. - * Saves only entries with positive counter difference of the form - * 2 bytes: Number of counter - * 8 bytes: Value of counter - */ -static size_t paicrypt_copy(struct pai_userdata *userdata, unsigned long *page, - unsigned long *page_old, bool exclude_user, - bool exclude_kernel) -{ - int i, outidx = 0; - - for (i = 1; i <= paicrypt_cnt; i++) { - u64 val = 0, val_old = 0; - - if (!exclude_kernel) { - val += paicrypt_getctr(page, i, true); - val_old += paicrypt_getctr(page_old, i, true); - } - if (!exclude_user) { - val += paicrypt_getctr(page, i, false); - val_old += paicrypt_getctr(page_old, i, false); - } - if (val >= val_old) - val -= val_old; - else - val = (~0ULL - val_old) + val + 1; - if (val) { - userdata[outidx].num = i; - userdata[outidx].value = val; - outidx++; - } - } - return outidx * sizeof(struct pai_userdata); -} - -static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump, - struct perf_event *event) -{ - struct perf_sample_data data; - struct perf_raw_record raw; - struct pt_regs regs; - int overflow; - - /* Setup perf sample */ - memset(®s, 0, sizeof(regs)); - memset(&raw, 0, sizeof(raw)); - memset(&data, 0, sizeof(data)); - perf_sample_data_init(&data, 0, event->hw.last_period); - if (event->attr.sample_type & PERF_SAMPLE_TID) { - data.tid_entry.pid = task_tgid_nr(current); - data.tid_entry.tid = task_pid_nr(current); - } - if (event->attr.sample_type & PERF_SAMPLE_TIME) - data.time = event->clock(); - if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) - data.id = event->id; - if (event->attr.sample_type & PERF_SAMPLE_CPU) { - data.cpu_entry.cpu = smp_processor_id(); - data.cpu_entry.reserved = 0; - } - if (event->attr.sample_type & PERF_SAMPLE_RAW) { - raw.frag.size = rawsize; - raw.frag.data = cpump->save; - perf_sample_save_raw_data(&data, &raw); - } - - overflow = perf_event_overflow(event, &data, ®s); - perf_event_update_userpage(event); - /* Save crypto counter lowcore page after reading event data. */ - memcpy((void *)PAI_SAVE_AREA(event), cpump->page, PAGE_SIZE); - return overflow; -} - -/* Check if there is data to be saved on schedule out of a task. */ -static void paicrypt_have_sample(struct perf_event *event, - struct paicrypt_map *cpump) -{ - size_t rawsize; - - if (!event) /* No event active */ - return; - rawsize = paicrypt_copy(cpump->save, cpump->page, - (unsigned long *)PAI_SAVE_AREA(event), - event->attr.exclude_user, - event->attr.exclude_kernel); - if (rawsize) /* No incremented counters */ - paicrypt_push_sample(rawsize, cpump, event); -} - -/* Check if there is data to be saved on schedule out of a task. */ -static void paicrypt_have_samples(void) -{ - struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); - struct paicrypt_map *cpump = mp->mapptr; - struct perf_event *event; - - list_for_each_entry(event, &cpump->syswide_list, hw.tp_list) - paicrypt_have_sample(event, cpump); -} - -/* Called on schedule-in and schedule-out. No access to event structure, - * but for sampling only event CRYPTO_ALL is allowed. - */ -static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) -{ - /* We started with a clean page on event installation. So read out - * results on schedule_out and if page was dirty, save old values. - */ - if (!sched_in) - paicrypt_have_samples(); -} - -/* Attribute definitions for paicrypt interface. As with other CPU - * Measurement Facilities, there is one attribute per mapped counter. - * The number of mapped counters may vary per machine generation. Use - * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction - * to determine the number of mapped counters. The instructions returns - * a positive number, which is the highest number of supported counters. - * All counters less than this number are also supported, there are no - * holes. A returned number of zero means no support for mapped counters. - * - * The identification of the counter is a unique number. The chosen range - * is 0x1000 + offset in mapped kernel page. - * All CPU Measurement Facility counters identifiers must be unique and - * the numbers from 0 to 496 are already used for the CPU Measurement - * Counter facility. Numbers 0xb0000, 0xbc000 and 0xbd000 are already - * used for the CPU Measurement Sampling facility. - */ -PMU_FORMAT_ATTR(event, "config:0-63"); - -static struct attribute *paicrypt_format_attr[] = { - &format_attr_event.attr, - NULL, -}; - -static struct attribute_group paicrypt_events_group = { - .name = "events", - .attrs = NULL /* Filled in attr_event_init() */ -}; - -static struct attribute_group paicrypt_format_group = { - .name = "format", - .attrs = paicrypt_format_attr, -}; - -static const struct attribute_group *paicrypt_attr_groups[] = { - &paicrypt_events_group, - &paicrypt_format_group, - NULL, -}; - -/* Performance monitoring unit for mapped counters */ -static struct pmu paicrypt = { - .task_ctx_nr = perf_hw_context, - .event_init = paicrypt_event_init, - .add = paicrypt_add, - .del = paicrypt_del, - .start = paicrypt_start, - .stop = paicrypt_stop, - .read = paicrypt_read, - .sched_task = paicrypt_sched_task, - .attr_groups = paicrypt_attr_groups -}; - -/* List of symbolic PAI counter names. */ -static const char * const paicrypt_ctrnames[] = { - [0] = "CRYPTO_ALL", - [1] = "KM_DEA", - [2] = "KM_TDEA_128", - [3] = "KM_TDEA_192", - [4] = "KM_ENCRYPTED_DEA", - [5] = "KM_ENCRYPTED_TDEA_128", - [6] = "KM_ENCRYPTED_TDEA_192", - [7] = "KM_AES_128", - [8] = "KM_AES_192", - [9] = "KM_AES_256", - [10] = "KM_ENCRYPTED_AES_128", - [11] = "KM_ENCRYPTED_AES_192", - [12] = "KM_ENCRYPTED_AES_256", - [13] = "KM_XTS_AES_128", - [14] = "KM_XTS_AES_256", - [15] = "KM_XTS_ENCRYPTED_AES_128", - [16] = "KM_XTS_ENCRYPTED_AES_256", - [17] = "KMC_DEA", - [18] = "KMC_TDEA_128", - [19] = "KMC_TDEA_192", - [20] = "KMC_ENCRYPTED_DEA", - [21] = "KMC_ENCRYPTED_TDEA_128", - [22] = "KMC_ENCRYPTED_TDEA_192", - [23] = "KMC_AES_128", - [24] = "KMC_AES_192", - [25] = "KMC_AES_256", - [26] = "KMC_ENCRYPTED_AES_128", - [27] = "KMC_ENCRYPTED_AES_192", - [28] = "KMC_ENCRYPTED_AES_256", - [29] = "KMC_PRNG", - [30] = "KMA_GCM_AES_128", - [31] = "KMA_GCM_AES_192", - [32] = "KMA_GCM_AES_256", - [33] = "KMA_GCM_ENCRYPTED_AES_128", - [34] = "KMA_GCM_ENCRYPTED_AES_192", - [35] = "KMA_GCM_ENCRYPTED_AES_256", - [36] = "KMF_DEA", - [37] = "KMF_TDEA_128", - [38] = "KMF_TDEA_192", - [39] = "KMF_ENCRYPTED_DEA", - [40] = "KMF_ENCRYPTED_TDEA_128", - [41] = "KMF_ENCRYPTED_TDEA_192", - [42] = "KMF_AES_128", - [43] = "KMF_AES_192", - [44] = "KMF_AES_256", - [45] = "KMF_ENCRYPTED_AES_128", - [46] = "KMF_ENCRYPTED_AES_192", - [47] = "KMF_ENCRYPTED_AES_256", - [48] = "KMCTR_DEA", - [49] = "KMCTR_TDEA_128", - [50] = "KMCTR_TDEA_192", - [51] = "KMCTR_ENCRYPTED_DEA", - [52] = "KMCTR_ENCRYPTED_TDEA_128", - [53] = "KMCTR_ENCRYPTED_TDEA_192", - [54] = "KMCTR_AES_128", - [55] = "KMCTR_AES_192", - [56] = "KMCTR_AES_256", - [57] = "KMCTR_ENCRYPTED_AES_128", - [58] = "KMCTR_ENCRYPTED_AES_192", - [59] = "KMCTR_ENCRYPTED_AES_256", - [60] = "KMO_DEA", - [61] = "KMO_TDEA_128", - [62] = "KMO_TDEA_192", - [63] = "KMO_ENCRYPTED_DEA", - [64] = "KMO_ENCRYPTED_TDEA_128", - [65] = "KMO_ENCRYPTED_TDEA_192", - [66] = "KMO_AES_128", - [67] = "KMO_AES_192", - [68] = "KMO_AES_256", - [69] = "KMO_ENCRYPTED_AES_128", - [70] = "KMO_ENCRYPTED_AES_192", - [71] = "KMO_ENCRYPTED_AES_256", - [72] = "KIMD_SHA_1", - [73] = "KIMD_SHA_256", - [74] = "KIMD_SHA_512", - [75] = "KIMD_SHA3_224", - [76] = "KIMD_SHA3_256", - [77] = "KIMD_SHA3_384", - [78] = "KIMD_SHA3_512", - [79] = "KIMD_SHAKE_128", - [80] = "KIMD_SHAKE_256", - [81] = "KIMD_GHASH", - [82] = "KLMD_SHA_1", - [83] = "KLMD_SHA_256", - [84] = "KLMD_SHA_512", - [85] = "KLMD_SHA3_224", - [86] = "KLMD_SHA3_256", - [87] = "KLMD_SHA3_384", - [88] = "KLMD_SHA3_512", - [89] = "KLMD_SHAKE_128", - [90] = "KLMD_SHAKE_256", - [91] = "KMAC_DEA", - [92] = "KMAC_TDEA_128", - [93] = "KMAC_TDEA_192", - [94] = "KMAC_ENCRYPTED_DEA", - [95] = "KMAC_ENCRYPTED_TDEA_128", - [96] = "KMAC_ENCRYPTED_TDEA_192", - [97] = "KMAC_AES_128", - [98] = "KMAC_AES_192", - [99] = "KMAC_AES_256", - [100] = "KMAC_ENCRYPTED_AES_128", - [101] = "KMAC_ENCRYPTED_AES_192", - [102] = "KMAC_ENCRYPTED_AES_256", - [103] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_DEA", - [104] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_128", - [105] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_192", - [106] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_DEA", - [107] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_128", - [108] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_192", - [109] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_128", - [110] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_192", - [111] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_256", - [112] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_128", - [113] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_192", - [114] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256A", - [115] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_128", - [116] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_256", - [117] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_128", - [118] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_256", - [119] = "PCC_SCALAR_MULTIPLY_P256", - [120] = "PCC_SCALAR_MULTIPLY_P384", - [121] = "PCC_SCALAR_MULTIPLY_P521", - [122] = "PCC_SCALAR_MULTIPLY_ED25519", - [123] = "PCC_SCALAR_MULTIPLY_ED448", - [124] = "PCC_SCALAR_MULTIPLY_X25519", - [125] = "PCC_SCALAR_MULTIPLY_X448", - [126] = "PRNO_SHA_512_DRNG", - [127] = "PRNO_TRNG_QUERY_RAW_TO_CONDITIONED_RATIO", - [128] = "PRNO_TRNG", - [129] = "KDSA_ECDSA_VERIFY_P256", - [130] = "KDSA_ECDSA_VERIFY_P384", - [131] = "KDSA_ECDSA_VERIFY_P521", - [132] = "KDSA_ECDSA_SIGN_P256", - [133] = "KDSA_ECDSA_SIGN_P384", - [134] = "KDSA_ECDSA_SIGN_P521", - [135] = "KDSA_ENCRYPTED_ECDSA_SIGN_P256", - [136] = "KDSA_ENCRYPTED_ECDSA_SIGN_P384", - [137] = "KDSA_ENCRYPTED_ECDSA_SIGN_P521", - [138] = "KDSA_EDDSA_VERIFY_ED25519", - [139] = "KDSA_EDDSA_VERIFY_ED448", - [140] = "KDSA_EDDSA_SIGN_ED25519", - [141] = "KDSA_EDDSA_SIGN_ED448", - [142] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED25519", - [143] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED448", - [144] = "PCKMO_ENCRYPT_DEA_KEY", - [145] = "PCKMO_ENCRYPT_TDEA_128_KEY", - [146] = "PCKMO_ENCRYPT_TDEA_192_KEY", - [147] = "PCKMO_ENCRYPT_AES_128_KEY", - [148] = "PCKMO_ENCRYPT_AES_192_KEY", - [149] = "PCKMO_ENCRYPT_AES_256_KEY", - [150] = "PCKMO_ENCRYPT_ECC_P256_KEY", - [151] = "PCKMO_ENCRYPT_ECC_P384_KEY", - [152] = "PCKMO_ENCRYPT_ECC_P521_KEY", - [153] = "PCKMO_ENCRYPT_ECC_ED25519_KEY", - [154] = "PCKMO_ENCRYPT_ECC_ED448_KEY", - [155] = "IBM_RESERVED_155", - [156] = "IBM_RESERVED_156", - [157] = "KM_FULL_XTS_AES_128", - [158] = "KM_FULL_XTS_AES_256", - [159] = "KM_FULL_XTS_ENCRYPTED_AES_128", - [160] = "KM_FULL_XTS_ENCRYPTED_AES_256", - [161] = "KMAC_HMAC_SHA_224", - [162] = "KMAC_HMAC_SHA_256", - [163] = "KMAC_HMAC_SHA_384", - [164] = "KMAC_HMAC_SHA_512", - [165] = "KMAC_HMAC_ENCRYPTED_SHA_224", - [166] = "KMAC_HMAC_ENCRYPTED_SHA_256", - [167] = "KMAC_HMAC_ENCRYPTED_SHA_384", - [168] = "KMAC_HMAC_ENCRYPTED_SHA_512", - [169] = "PCKMO_ENCRYPT_HMAC_512_KEY", - [170] = "PCKMO_ENCRYPT_HMAC_1024_KEY", - [171] = "PCKMO_ENCRYPT_AES_XTS_128", - [172] = "PCKMO_ENCRYPT_AES_XTS_256", -}; - -static void __init attr_event_free(struct attribute **attrs, int num) -{ - struct perf_pmu_events_attr *pa; - int i; - - for (i = 0; i < num; i++) { - struct device_attribute *dap; - - dap = container_of(attrs[i], struct device_attribute, attr); - pa = container_of(dap, struct perf_pmu_events_attr, attr); - kfree(pa); - } - kfree(attrs); -} - -static int __init attr_event_init_one(struct attribute **attrs, int num) -{ - struct perf_pmu_events_attr *pa; - - /* Index larger than array_size, no counter name available */ - if (num >= ARRAY_SIZE(paicrypt_ctrnames)) { - attrs[num] = NULL; - return 0; - } - - pa = kzalloc(sizeof(*pa), GFP_KERNEL); - if (!pa) - return -ENOMEM; - - sysfs_attr_init(&pa->attr.attr); - pa->id = PAI_CRYPTO_BASE + num; - pa->attr.attr.name = paicrypt_ctrnames[num]; - pa->attr.attr.mode = 0444; - pa->attr.show = cpumf_events_sysfs_show; - pa->attr.store = NULL; - attrs[num] = &pa->attr.attr; - return 0; -} - -/* Create PMU sysfs event attributes on the fly. */ -static int __init attr_event_init(void) -{ - struct attribute **attrs; - int ret, i; - - attrs = kmalloc_array(paicrypt_cnt + 2, sizeof(*attrs), GFP_KERNEL); - if (!attrs) - return -ENOMEM; - for (i = 0; i <= paicrypt_cnt; i++) { - ret = attr_event_init_one(attrs, i); - if (ret) { - attr_event_free(attrs, i); - return ret; - } - } - attrs[i] = NULL; - paicrypt_events_group.attrs = attrs; - return 0; -} - -static int __init paicrypt_init(void) -{ - struct qpaci_info_block ib; - int rc; - - if (!test_facility(196)) - return 0; - - qpaci(&ib); - paicrypt_cnt = ib.num_cc; - if (paicrypt_cnt == 0) - return 0; - if (paicrypt_cnt >= PAI_CRYPTO_MAXCTR) { - pr_err("Too many PMU pai_crypto counters %d\n", paicrypt_cnt); - return -E2BIG; - } - - rc = attr_event_init(); /* Export known PAI crypto events */ - if (rc) { - pr_err("Creation of PMU pai_crypto /sysfs failed\n"); - return rc; - } - - /* Setup s390dbf facility */ - cfm_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128); - if (!cfm_dbg) { - pr_err("Registration of s390dbf pai_crypto failed\n"); - return -ENOMEM; - } - debug_register_view(cfm_dbg, &debug_sprintf_view); - - rc = perf_pmu_register(&paicrypt, "pai_crypto", -1); - if (rc) { - pr_err("Registering the pai_crypto PMU failed with rc=%i\n", - rc); - debug_unregister_view(cfm_dbg, &debug_sprintf_view); - debug_unregister(cfm_dbg); - return rc; - } - return 0; -} - -device_initcall(paicrypt_init); diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c deleted file mode 100644 index 7f462bef1fc0..000000000000 --- a/arch/s390/kernel/perf_pai_ext.c +++ /dev/null @@ -1,756 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Performance event support - Processor Activity Instrumentation Extension - * Facility - * - * Copyright IBM Corp. 2022 - * Author(s): Thomas Richter <tmricht@linux.ibm.com> - */ -#define KMSG_COMPONENT "pai_ext" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include <linux/kernel.h> -#include <linux/kernel_stat.h> -#include <linux/percpu.h> -#include <linux/notifier.h> -#include <linux/init.h> -#include <linux/export.h> -#include <linux/io.h> -#include <linux/perf_event.h> -#include <asm/ctlreg.h> -#include <asm/pai.h> -#include <asm/debug.h> - -#define PAIE1_CB_SZ 0x200 /* Size of PAIE1 control block */ -#define PAIE1_CTRBLOCK_SZ 0x400 /* Size of PAIE1 counter blocks */ - -static debug_info_t *paiext_dbg; -static unsigned int paiext_cnt; /* Extracted with QPACI instruction */ - -struct pai_userdata { - u16 num; - u64 value; -} __packed; - -/* Create the PAI extension 1 control block area. - * The PAI extension control block 1 is pointed to by lowcore - * address 0x1508 for each CPU. This control block is 512 bytes in size - * and requires a 512 byte boundary alignment. - */ -struct paiext_cb { /* PAI extension 1 control block */ - u64 header; /* Not used */ - u64 reserved1; - u64 acc; /* Addr to analytics counter control block */ - u8 reserved2[488]; -} __packed; - -struct paiext_map { - unsigned long *area; /* Area for CPU to store counters */ - struct pai_userdata *save; /* Area to store non-zero counters */ - unsigned int active_events; /* # of PAI Extension users */ - refcount_t refcnt; - struct perf_event *event; /* Perf event for sampling */ - struct paiext_cb *paiext_cb; /* PAI extension control block area */ - struct list_head syswide_list; /* List system-wide sampling events */ -}; - -struct paiext_mapptr { - struct paiext_map *mapptr; -}; - -static struct paiext_root { /* Anchor to per CPU data */ - refcount_t refcnt; /* Overall active events */ - struct paiext_mapptr __percpu *mapptr; -} paiext_root; - -/* Free per CPU data when the last event is removed. */ -static void paiext_root_free(void) -{ - if (refcount_dec_and_test(&paiext_root.refcnt)) { - free_percpu(paiext_root.mapptr); - paiext_root.mapptr = NULL; - } - debug_sprintf_event(paiext_dbg, 5, "%s root.refcount %d\n", __func__, - refcount_read(&paiext_root.refcnt)); -} - -/* On initialization of first event also allocate per CPU data dynamically. - * Start with an array of pointers, the array size is the maximum number of - * CPUs possible, which might be larger than the number of CPUs currently - * online. - */ -static int paiext_root_alloc(void) -{ - if (!refcount_inc_not_zero(&paiext_root.refcnt)) { - /* The memory is already zeroed. */ - paiext_root.mapptr = alloc_percpu(struct paiext_mapptr); - if (!paiext_root.mapptr) { - /* Returning without refcnt adjustment is ok. The - * error code is handled by paiext_alloc() which - * decrements refcnt when an event can not be - * created. - */ - return -ENOMEM; - } - refcount_set(&paiext_root.refcnt, 1); - } - return 0; -} - -/* Protects against concurrent increment of sampler and counter member - * increments at the same time and prohibits concurrent execution of - * counting and sampling events. - * Ensures that analytics counter block is deallocated only when the - * sampling and counting on that cpu is zero. - * For details see paiext_alloc(). - */ -static DEFINE_MUTEX(paiext_reserve_mutex); - -/* Free all memory allocated for event counting/sampling setup */ -static void paiext_free(struct paiext_mapptr *mp) -{ - kfree(mp->mapptr->area); - kfree(mp->mapptr->paiext_cb); - kvfree(mp->mapptr->save); - kfree(mp->mapptr); - mp->mapptr = NULL; -} - -/* Release the PMU if event is the last perf event */ -static void paiext_event_destroy_cpu(struct perf_event *event, int cpu) -{ - struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, cpu); - struct paiext_map *cpump = mp->mapptr; - - mutex_lock(&paiext_reserve_mutex); - if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */ - paiext_free(mp); - paiext_root_free(); - mutex_unlock(&paiext_reserve_mutex); -} - -static void paiext_event_destroy(struct perf_event *event) -{ - int cpu; - - free_page(PAI_SAVE_AREA(event)); - if (event->cpu == -1) { - struct cpumask *mask = PAI_CPU_MASK(event); - - for_each_cpu(cpu, mask) - paiext_event_destroy_cpu(event, cpu); - kfree(mask); - } else { - paiext_event_destroy_cpu(event, event->cpu); - } - debug_sprintf_event(paiext_dbg, 4, "%s cpu %d\n", __func__, - event->cpu); -} - -/* Used to avoid races in checking concurrent access of counting and - * sampling for pai_extension events. - * - * Only one instance of event pai_ext/NNPA_ALL/ for sampling is - * allowed and when this event is running, no counting event is allowed. - * Several counting events are allowed in parallel, but no sampling event - * is allowed while one (or more) counting events are running. - * - * This function is called in process context and it is safe to block. - * When the event initialization functions fails, no other call back will - * be invoked. - * - * Allocate the memory for the event. - */ -static int paiext_alloc_cpu(struct perf_event *event, int cpu) -{ - struct paiext_mapptr *mp; - struct paiext_map *cpump; - int rc; - - mutex_lock(&paiext_reserve_mutex); - rc = paiext_root_alloc(); - if (rc) - goto unlock; - - mp = per_cpu_ptr(paiext_root.mapptr, cpu); - cpump = mp->mapptr; - if (!cpump) { /* Paiext_map allocated? */ - rc = -ENOMEM; - cpump = kzalloc(sizeof(*cpump), GFP_KERNEL); - if (!cpump) - goto undo; - - /* Allocate memory for counter area and counter extraction. - * These are - * - a 512 byte block and requires 512 byte boundary alignment. - * - a 1KB byte block and requires 1KB boundary alignment. - * Only the first counting event has to allocate the area. - * - * Note: This works with commit 59bb47985c1d by default. - * Backporting this to kernels without this commit might - * need adjustment. - */ - mp->mapptr = cpump; - cpump->area = kzalloc(PAIE1_CTRBLOCK_SZ, GFP_KERNEL); - cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL); - cpump->save = kvmalloc_array(paiext_cnt + 1, - sizeof(struct pai_userdata), - GFP_KERNEL); - if (!cpump->save || !cpump->area || !cpump->paiext_cb) { - paiext_free(mp); - goto undo; - } - INIT_LIST_HEAD(&cpump->syswide_list); - refcount_set(&cpump->refcnt, 1); - rc = 0; - } else { - refcount_inc(&cpump->refcnt); - } - -undo: - if (rc) { - /* Error in allocation of event, decrement anchor. Since - * the event in not created, its destroy() function is never - * invoked. Adjust the reference counter for the anchor. - */ - paiext_root_free(); - } -unlock: - mutex_unlock(&paiext_reserve_mutex); - /* If rc is non-zero, no increment of counter/sampler was done. */ - return rc; -} - -static int paiext_alloc(struct perf_event *event) -{ - struct cpumask *maskptr; - int cpu, rc = -ENOMEM; - - maskptr = kzalloc(sizeof(*maskptr), GFP_KERNEL); - if (!maskptr) - goto out; - - for_each_online_cpu(cpu) { - rc = paiext_alloc_cpu(event, cpu); - if (rc) { - for_each_cpu(cpu, maskptr) - paiext_event_destroy_cpu(event, cpu); - kfree(maskptr); - goto out; - } - cpumask_set_cpu(cpu, maskptr); - } - - /* - * On error all cpumask are freed and all events have been destroyed. - * Save of which CPUs data structures have been allocated for. - * Release them in paicrypt_event_destroy call back function - * for this event. - */ - PAI_CPU_MASK(event) = maskptr; - rc = 0; -out: - return rc; -} - -/* The PAI extension 1 control block supports up to 128 entries. Return - * the index within PAIE1_CB given the event number. Also validate event - * number. - */ -static int paiext_event_valid(struct perf_event *event) -{ - u64 cfg = event->attr.config; - - if (cfg >= PAI_NNPA_BASE && cfg <= PAI_NNPA_BASE + paiext_cnt) { - /* Offset NNPA in paiext_cb */ - event->hw.config_base = offsetof(struct paiext_cb, acc); - return 0; - } - return -EINVAL; -} - -/* Might be called on different CPU than the one the event is intended for. */ -static int paiext_event_init(struct perf_event *event) -{ - struct perf_event_attr *a = &event->attr; - int rc; - - /* PMU pai_ext registered as PERF_TYPE_RAW, check event type */ - if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type) - return -ENOENT; - /* PAI extension event must be valid and in supported range */ - rc = paiext_event_valid(event); - if (rc) - return rc; - /* Allow only event NNPA_ALL for sampling. */ - if (a->sample_period && a->config != PAI_NNPA_BASE) - return -EINVAL; - /* Prohibit exclude_user event selection */ - if (a->exclude_user) - return -EINVAL; - /* Get a page to store last counter values for sampling */ - if (a->sample_period) { - PAI_SAVE_AREA(event) = get_zeroed_page(GFP_KERNEL); - if (!PAI_SAVE_AREA(event)) - return -ENOMEM; - } - - if (event->cpu >= 0) - rc = paiext_alloc_cpu(event, event->cpu); - else - rc = paiext_alloc(event); - if (rc) { - free_page(PAI_SAVE_AREA(event)); - return rc; - } - event->destroy = paiext_event_destroy; - - if (a->sample_period) { - a->sample_period = 1; - a->freq = 0; - /* Register for paicrypt_sched_task() to be called */ - event->attach_state |= PERF_ATTACH_SCHED_CB; - /* Add raw data which are the memory mapped counters */ - a->sample_type |= PERF_SAMPLE_RAW; - /* Turn off inheritance */ - a->inherit = 0; - } - - return 0; -} - -static u64 paiext_getctr(unsigned long *area, int nr) -{ - return area[nr]; -} - -/* Read the counter values. Return value from location in buffer. For event - * NNPA_ALL sum up all events. - */ -static u64 paiext_getdata(struct perf_event *event) -{ - struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); - struct paiext_map *cpump = mp->mapptr; - u64 sum = 0; - int i; - - if (event->attr.config != PAI_NNPA_BASE) - return paiext_getctr(cpump->area, - event->attr.config - PAI_NNPA_BASE); - - for (i = 1; i <= paiext_cnt; i++) - sum += paiext_getctr(cpump->area, i); - - return sum; -} - -static u64 paiext_getall(struct perf_event *event) -{ - return paiext_getdata(event); -} - -static void paiext_read(struct perf_event *event) -{ - u64 prev, new, delta; - - prev = local64_read(&event->hw.prev_count); - new = paiext_getall(event); - local64_set(&event->hw.prev_count, new); - delta = new - prev; - local64_add(delta, &event->count); -} - -static void paiext_start(struct perf_event *event, int flags) -{ - struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); - struct paiext_map *cpump = mp->mapptr; - u64 sum; - - if (!event->attr.sample_period) { /* Counting */ - sum = paiext_getall(event); /* Get current value */ - local64_set(&event->hw.prev_count, sum); - } else { /* Sampling */ - memcpy((void *)PAI_SAVE_AREA(event), cpump->area, - PAIE1_CTRBLOCK_SZ); - /* Enable context switch callback for system-wide sampling */ - if (!(event->attach_state & PERF_ATTACH_TASK)) { - list_add_tail(PAI_SWLIST(event), &cpump->syswide_list); - perf_sched_cb_inc(event->pmu); - } else { - cpump->event = event; - } - } -} - -static int paiext_add(struct perf_event *event, int flags) -{ - struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); - struct paiext_map *cpump = mp->mapptr; - struct paiext_cb *pcb = cpump->paiext_cb; - - if (++cpump->active_events == 1) { - get_lowcore()->aicd = virt_to_phys(cpump->paiext_cb); - pcb->acc = virt_to_phys(cpump->area) | 0x1; - /* Enable CPU instruction lookup for PAIE1 control block */ - local_ctl_set_bit(0, CR0_PAI_EXTENSION_BIT); - } - if (flags & PERF_EF_START) - paiext_start(event, PERF_EF_RELOAD); - event->hw.state = 0; - return 0; -} - -static void paiext_have_sample(struct perf_event *, struct paiext_map *); -static void paiext_stop(struct perf_event *event, int flags) -{ - struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); - struct paiext_map *cpump = mp->mapptr; - - if (!event->attr.sample_period) { /* Counting */ - paiext_read(event); - } else { /* Sampling */ - if (!(event->attach_state & PERF_ATTACH_TASK)) { - list_del(PAI_SWLIST(event)); - perf_sched_cb_dec(event->pmu); - } else { - paiext_have_sample(event, cpump); - cpump->event = NULL; - } - } - event->hw.state = PERF_HES_STOPPED; -} - -static void paiext_del(struct perf_event *event, int flags) -{ - struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); - struct paiext_map *cpump = mp->mapptr; - struct paiext_cb *pcb = cpump->paiext_cb; - - paiext_stop(event, PERF_EF_UPDATE); - if (--cpump->active_events == 0) { - /* Disable CPU instruction lookup for PAIE1 control block */ - local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT); - pcb->acc = 0; - get_lowcore()->aicd = 0; - } -} - -/* Create raw data and save it in buffer. Returns number of bytes copied. - * Saves only positive counter entries of the form - * 2 bytes: Number of counter - * 8 bytes: Value of counter - */ -static size_t paiext_copy(struct pai_userdata *userdata, unsigned long *area, - unsigned long *area_old) -{ - int i, outidx = 0; - - for (i = 1; i <= paiext_cnt; i++) { - u64 val = paiext_getctr(area, i); - u64 val_old = paiext_getctr(area_old, i); - - if (val >= val_old) - val -= val_old; - else - val = (~0ULL - val_old) + val + 1; - if (val) { - userdata[outidx].num = i; - userdata[outidx].value = val; - outidx++; - } - } - return outidx * sizeof(*userdata); -} - -/* Write sample when one or more counters values are nonzero. - * - * Note: The function paiext_sched_task() and paiext_push_sample() are not - * invoked after function paiext_del() has been called because of function - * perf_sched_cb_dec(). - * The function paiext_sched_task() and paiext_push_sample() are only - * called when sampling is active. Function perf_sched_cb_inc() - * has been invoked to install function paiext_sched_task() as call back - * to run at context switch time (see paiext_add()). - * - * This causes function perf_event_context_sched_out() and - * perf_event_context_sched_in() to check whether the PMU has installed an - * sched_task() callback. That callback is not active after paiext_del() - * returns and has deleted the event on that CPU. - */ -static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump, - struct perf_event *event) -{ - struct perf_sample_data data; - struct perf_raw_record raw; - struct pt_regs regs; - int overflow; - - /* Setup perf sample */ - memset(®s, 0, sizeof(regs)); - memset(&raw, 0, sizeof(raw)); - memset(&data, 0, sizeof(data)); - perf_sample_data_init(&data, 0, event->hw.last_period); - if (event->attr.sample_type & PERF_SAMPLE_TID) { - data.tid_entry.pid = task_tgid_nr(current); - data.tid_entry.tid = task_pid_nr(current); - } - if (event->attr.sample_type & PERF_SAMPLE_TIME) - data.time = event->clock(); - if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) - data.id = event->id; - if (event->attr.sample_type & PERF_SAMPLE_CPU) - data.cpu_entry.cpu = smp_processor_id(); - if (event->attr.sample_type & PERF_SAMPLE_RAW) { - raw.frag.size = rawsize; - raw.frag.data = cpump->save; - perf_sample_save_raw_data(&data, &raw); - } - - overflow = perf_event_overflow(event, &data, ®s); - perf_event_update_userpage(event); - /* Save NNPA lowcore area after read in event */ - memcpy((void *)PAI_SAVE_AREA(event), cpump->area, - PAIE1_CTRBLOCK_SZ); - return overflow; -} - -/* Check if there is data to be saved on schedule out of a task. */ -static void paiext_have_sample(struct perf_event *event, - struct paiext_map *cpump) -{ - size_t rawsize; - - if (!event) - return; - rawsize = paiext_copy(cpump->save, cpump->area, - (unsigned long *)PAI_SAVE_AREA(event)); - if (rawsize) /* Incremented counters */ - paiext_push_sample(rawsize, cpump, event); -} - -/* Check if there is data to be saved on schedule out of a task. */ -static void paiext_have_samples(void) -{ - struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); - struct paiext_map *cpump = mp->mapptr; - struct perf_event *event; - - list_for_each_entry(event, &cpump->syswide_list, hw.tp_list) - paiext_have_sample(event, cpump); -} - -/* Called on schedule-in and schedule-out. No access to event structure, - * but for sampling only event NNPA_ALL is allowed. - */ -static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) -{ - /* We started with a clean page on event installation. So read out - * results on schedule_out and if page was dirty, save old values. - */ - if (!sched_in) - paiext_have_samples(); -} - -/* Attribute definitions for pai extension1 interface. As with other CPU - * Measurement Facilities, there is one attribute per mapped counter. - * The number of mapped counters may vary per machine generation. Use - * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction - * to determine the number of mapped counters. The instructions returns - * a positive number, which is the highest number of supported counters. - * All counters less than this number are also supported, there are no - * holes. A returned number of zero means no support for mapped counters. - * - * The identification of the counter is a unique number. The chosen range - * is 0x1800 + offset in mapped kernel page. - * All CPU Measurement Facility counters identifiers must be unique and - * the numbers from 0 to 496 are already used for the CPU Measurement - * Counter facility. Number 0x1000 to 0x103e are used for PAI cryptography - * counters. - * Numbers 0xb0000, 0xbc000 and 0xbd000 are already - * used for the CPU Measurement Sampling facility. - */ -PMU_FORMAT_ATTR(event, "config:0-63"); - -static struct attribute *paiext_format_attr[] = { - &format_attr_event.attr, - NULL, -}; - -static struct attribute_group paiext_events_group = { - .name = "events", - .attrs = NULL, /* Filled in attr_event_init() */ -}; - -static struct attribute_group paiext_format_group = { - .name = "format", - .attrs = paiext_format_attr, -}; - -static const struct attribute_group *paiext_attr_groups[] = { - &paiext_events_group, - &paiext_format_group, - NULL, -}; - -/* Performance monitoring unit for mapped counters */ -static struct pmu paiext = { - .task_ctx_nr = perf_hw_context, - .event_init = paiext_event_init, - .add = paiext_add, - .del = paiext_del, - .start = paiext_start, - .stop = paiext_stop, - .read = paiext_read, - .sched_task = paiext_sched_task, - .attr_groups = paiext_attr_groups, -}; - -/* List of symbolic PAI extension 1 NNPA counter names. */ -static const char * const paiext_ctrnames[] = { - [0] = "NNPA_ALL", - [1] = "NNPA_ADD", - [2] = "NNPA_SUB", - [3] = "NNPA_MUL", - [4] = "NNPA_DIV", - [5] = "NNPA_MIN", - [6] = "NNPA_MAX", - [7] = "NNPA_LOG", - [8] = "NNPA_EXP", - [9] = "NNPA_IBM_RESERVED_9", - [10] = "NNPA_RELU", - [11] = "NNPA_TANH", - [12] = "NNPA_SIGMOID", - [13] = "NNPA_SOFTMAX", - [14] = "NNPA_BATCHNORM", - [15] = "NNPA_MAXPOOL2D", - [16] = "NNPA_AVGPOOL2D", - [17] = "NNPA_LSTMACT", - [18] = "NNPA_GRUACT", - [19] = "NNPA_CONVOLUTION", - [20] = "NNPA_MATMUL_OP", - [21] = "NNPA_MATMUL_OP_BCAST23", - [22] = "NNPA_SMALLBATCH", - [23] = "NNPA_LARGEDIM", - [24] = "NNPA_SMALLTENSOR", - [25] = "NNPA_1MFRAME", - [26] = "NNPA_2GFRAME", - [27] = "NNPA_ACCESSEXCEPT", - [28] = "NNPA_TRANSFORM", - [29] = "NNPA_GELU", - [30] = "NNPA_MOMENTS", - [31] = "NNPA_LAYERNORM", - [32] = "NNPA_MATMUL_OP_BCAST1", - [33] = "NNPA_SQRT", - [34] = "NNPA_INVSQRT", - [35] = "NNPA_NORM", - [36] = "NNPA_REDUCE", -}; - -static void __init attr_event_free(struct attribute **attrs, int num) -{ - struct perf_pmu_events_attr *pa; - struct device_attribute *dap; - int i; - - for (i = 0; i < num; i++) { - dap = container_of(attrs[i], struct device_attribute, attr); - pa = container_of(dap, struct perf_pmu_events_attr, attr); - kfree(pa); - } - kfree(attrs); -} - -static int __init attr_event_init_one(struct attribute **attrs, int num) -{ - struct perf_pmu_events_attr *pa; - - /* Index larger than array_size, no counter name available */ - if (num >= ARRAY_SIZE(paiext_ctrnames)) { - attrs[num] = NULL; - return 0; - } - - pa = kzalloc(sizeof(*pa), GFP_KERNEL); - if (!pa) - return -ENOMEM; - - sysfs_attr_init(&pa->attr.attr); - pa->id = PAI_NNPA_BASE + num; - pa->attr.attr.name = paiext_ctrnames[num]; - pa->attr.attr.mode = 0444; - pa->attr.show = cpumf_events_sysfs_show; - pa->attr.store = NULL; - attrs[num] = &pa->attr.attr; - return 0; -} - -/* Create PMU sysfs event attributes on the fly. */ -static int __init attr_event_init(void) -{ - struct attribute **attrs; - int ret, i; - - attrs = kmalloc_array(paiext_cnt + 2, sizeof(*attrs), GFP_KERNEL); - if (!attrs) - return -ENOMEM; - for (i = 0; i <= paiext_cnt; i++) { - ret = attr_event_init_one(attrs, i); - if (ret) { - attr_event_free(attrs, i); - return ret; - } - } - attrs[i] = NULL; - paiext_events_group.attrs = attrs; - return 0; -} - -static int __init paiext_init(void) -{ - struct qpaci_info_block ib; - int rc = -ENOMEM; - - if (!test_facility(197)) - return 0; - - qpaci(&ib); - paiext_cnt = ib.num_nnpa; - if (paiext_cnt >= PAI_NNPA_MAXCTR) - paiext_cnt = PAI_NNPA_MAXCTR; - if (!paiext_cnt) - return 0; - - rc = attr_event_init(); - if (rc) { - pr_err("Creation of PMU " KMSG_COMPONENT " /sysfs failed\n"); - return rc; - } - - /* Setup s390dbf facility */ - paiext_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128); - if (!paiext_dbg) { - pr_err("Registration of s390dbf " KMSG_COMPONENT " failed\n"); - rc = -ENOMEM; - goto out_init; - } - debug_register_view(paiext_dbg, &debug_sprintf_view); - - rc = perf_pmu_register(&paiext, KMSG_COMPONENT, -1); - if (rc) { - pr_err("Registration of " KMSG_COMPONENT " PMU failed with " - "rc=%i\n", rc); - goto out_pmu; - } - - return 0; - -out_pmu: - debug_unregister_view(paiext_dbg, &debug_sprintf_view); - debug_unregister(paiext_dbg); -out_init: - attr_event_free(paiext_events_group.attrs, - ARRAY_SIZE(paiext_ctrnames) + 1); - return rc; -} - -device_initcall(paiext_init); diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c index a6b058ee4a36..7b305f1456f8 100644 --- a/arch/s390/kernel/perf_regs.c +++ b/arch/s390/kernel/perf_regs.c @@ -44,9 +44,6 @@ int perf_reg_validate(u64 mask) u64 perf_reg_abi(struct task_struct *task) { - if (test_tsk_thread_flag(task, TIF_31BIT)) - return PERF_SAMPLE_REGS_ABI_32; - return PERF_SAMPLE_REGS_ABI_64; } diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 9637aee43c40..0df95dcb2101 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -24,10 +24,8 @@ #include <linux/tick.h> #include <linux/personality.h> #include <linux/syscalls.h> -#include <linux/compat.h> #include <linux/kprobes.h> #include <linux/random.h> -#include <linux/export.h> #include <linux/init_task.h> #include <linux/entry-common.h> #include <linux/io.h> @@ -107,7 +105,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { - unsigned long clone_flags = args->flags; + u64 clone_flags = args->flags; unsigned long new_stackp = args->stack; unsigned long tls = args->tls; struct fake_frame @@ -167,12 +165,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) { - if (is_compat_task()) { - p->thread.acrs[0] = (unsigned int)tls; - } else { - p->thread.acrs[0] = (unsigned int)(tls >> 32); - p->thread.acrs[1] = (unsigned int)tls; - } + p->thread.acrs[0] = (unsigned int)(tls >> 32); + p->thread.acrs[1] = (unsigned int)tls; } /* * s390 stores the svc return address in arch_data when calling diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 5ce9a795a0fe..e33a3eccda56 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -4,10 +4,10 @@ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) */ -#define KMSG_COMPONENT "cpu" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "cpu: " fmt #include <linux/stop_machine.h> +#include <linux/cpufeature.h> #include <linux/bitops.h> #include <linux/kernel.h> #include <linux/random.h> @@ -19,6 +19,7 @@ #include <linux/cpu.h> #include <linux/smp.h> #include <asm/text-patching.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/facility.h> #include <asm/elf.h> @@ -72,7 +73,7 @@ void notrace stop_machine_yield(const struct cpumask *cpumask) this_cpu = smp_processor_id(); if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) { __this_cpu_write(cpu_relax_retry, 0); - cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false); + cpu = cpumask_next_wrap(this_cpu, cpumask); if (cpu >= nr_cpu_ids) return; if (arch_vcpu_is_preempted(cpu)) @@ -209,14 +210,14 @@ static int __init setup_hwcaps(void) elf_hwcap |= HWCAP_DFP; /* huge page support */ - if (MACHINE_HAS_EDAT1) + if (cpu_has_edat1()) elf_hwcap |= HWCAP_HPAGE; /* 64-bit register support for 31-bit processes */ elf_hwcap |= HWCAP_HIGH_GPRS; /* transactional execution */ - if (MACHINE_HAS_TE) + if (machine_has_tx()) elf_hwcap |= HWCAP_TE; /* vector */ @@ -244,10 +245,10 @@ static int __init setup_hwcaps(void) elf_hwcap |= HWCAP_NNPA; /* guarded storage */ - if (MACHINE_HAS_GS) + if (cpu_has_gs()) elf_hwcap |= HWCAP_GS; - if (MACHINE_HAS_PCI_MIO) + if (test_machine_feature(MFEATURE_PCI_MIO)) elf_hwcap |= HWCAP_PCI_MIO; /* virtualization support */ @@ -266,31 +267,35 @@ static int __init setup_elf_platform(void) add_device_randomness(&cpu_id, sizeof(cpu_id)); switch (cpu_id.machine) { default: /* Use "z10" as default. */ - strcpy(elf_platform, "z10"); + strscpy(elf_platform, "z10"); break; case 0x2817: case 0x2818: - strcpy(elf_platform, "z196"); + strscpy(elf_platform, "z196"); break; case 0x2827: case 0x2828: - strcpy(elf_platform, "zEC12"); + strscpy(elf_platform, "zEC12"); break; case 0x2964: case 0x2965: - strcpy(elf_platform, "z13"); + strscpy(elf_platform, "z13"); break; case 0x3906: case 0x3907: - strcpy(elf_platform, "z14"); + strscpy(elf_platform, "z14"); break; case 0x8561: case 0x8562: - strcpy(elf_platform, "z15"); + strscpy(elf_platform, "z15"); break; case 0x3931: case 0x3932: - strcpy(elf_platform, "z16"); + strscpy(elf_platform, "z16"); + break; + case 0x9175: + case 0x9176: + strscpy(elf_platform, "z17"); break; } return 0; diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 1cfed8b710b8..ceaa1726e328 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -7,10 +7,10 @@ * Martin Schwidefsky (schwidefsky@de.ibm.com) */ -#include "asm/ptrace.h" #include <linux/kernel.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> +#include <linux/cpufeature.h> #include <linux/mm.h> #include <linux/smp.h> #include <linux/errno.h> @@ -22,7 +22,6 @@ #include <linux/elf.h> #include <linux/regset.h> #include <linux/seccomp.h> -#include <linux/compat.h> #include <trace/syscall.h> #include <asm/guarded_storage.h> #include <asm/access-regs.h> @@ -31,14 +30,13 @@ #include <asm/unistd.h> #include <asm/runtime_instr.h> #include <asm/facility.h> +#include <asm/machine.h> +#include <asm/ptrace.h> +#include <asm/rwonce.h> #include <asm/fpu.h> #include "entry.h" -#ifdef CONFIG_COMPAT -#include "compat_ptrace.h" -#endif - void update_cr_regs(struct task_struct *task) { struct pt_regs *regs = task_pt_regs(task); @@ -60,7 +58,7 @@ void update_cr_regs(struct task_struct *task) cr0_new = cr0_old; cr2_new = cr2_old; /* Take care of the enable/disable of transactional execution. */ - if (MACHINE_HAS_TE) { + if (machine_has_tx()) { /* Set or clear transaction execution TXC bit 8. */ cr0_new.tcx = 1; if (task->thread.per_flags & PER_FLAG_NO_TE) @@ -75,7 +73,7 @@ void update_cr_regs(struct task_struct *task) } } /* Take care of enable/disable of guarded storage. */ - if (MACHINE_HAS_GS) { + if (cpu_has_gs()) { cr2_new.gse = 0; if (task->thread.gs_cb) cr2_new.gse = 1; @@ -470,18 +468,18 @@ long arch_ptrace(struct task_struct *child, long request, case PTRACE_GET_LAST_BREAK: return put_user(child->thread.last_break, (unsigned long __user *)data); case PTRACE_ENABLE_TE: - if (!MACHINE_HAS_TE) + if (!machine_has_tx()) return -EIO; child->thread.per_flags &= ~PER_FLAG_NO_TE; return 0; case PTRACE_DISABLE_TE: - if (!MACHINE_HAS_TE) + if (!machine_has_tx()) return -EIO; child->thread.per_flags |= PER_FLAG_NO_TE; child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND; return 0; case PTRACE_TE_ABORT_RAND: - if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE)) + if (!machine_has_tx() || (child->thread.per_flags & PER_FLAG_NO_TE)) return -EIO; switch (data) { case 0UL: @@ -504,308 +502,6 @@ long arch_ptrace(struct task_struct *child, long request, } } -#ifdef CONFIG_COMPAT -/* - * Now the fun part starts... a 31 bit program running in the - * 31 bit emulation tracing another program. PTRACE_PEEKTEXT, - * PTRACE_PEEKDATA, PTRACE_POKETEXT and PTRACE_POKEDATA are easy - * to handle, the difference to the 64 bit versions of the requests - * is that the access is done in multiples of 4 byte instead of - * 8 bytes (sizeof(unsigned long) on 31/64 bit). - * The ugly part are PTRACE_PEEKUSR, PTRACE_PEEKUSR_AREA, - * PTRACE_POKEUSR and PTRACE_POKEUSR_AREA. If the traced program - * is a 31 bit program too, the content of struct user can be - * emulated. A 31 bit program peeking into the struct user of - * a 64 bit program is a no-no. - */ - -/* - * Same as peek_user_per but for a 31 bit program. - */ -static inline __u32 __peek_user_per_compat(struct task_struct *child, - addr_t addr) -{ - if (addr == offsetof(struct compat_per_struct_kernel, cr9)) - /* Control bits of the active per set. */ - return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? - PER_EVENT_IFETCH : child->thread.per_user.control; - else if (addr == offsetof(struct compat_per_struct_kernel, cr10)) - /* Start address of the active per set. */ - return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? - 0 : child->thread.per_user.start; - else if (addr == offsetof(struct compat_per_struct_kernel, cr11)) - /* End address of the active per set. */ - return test_thread_flag(TIF_SINGLE_STEP) ? - PSW32_ADDR_INSN : child->thread.per_user.end; - else if (addr == offsetof(struct compat_per_struct_kernel, bits)) - /* Single-step bit. */ - return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? - 0x80000000 : 0; - else if (addr == offsetof(struct compat_per_struct_kernel, starting_addr)) - /* Start address of the user specified per set. */ - return (__u32) child->thread.per_user.start; - else if (addr == offsetof(struct compat_per_struct_kernel, ending_addr)) - /* End address of the user specified per set. */ - return (__u32) child->thread.per_user.end; - else if (addr == offsetof(struct compat_per_struct_kernel, perc_atmid)) - /* PER code, ATMID and AI of the last PER trap */ - return (__u32) child->thread.per_event.cause << 16; - else if (addr == offsetof(struct compat_per_struct_kernel, address)) - /* Address of the last PER trap */ - return (__u32) child->thread.per_event.address; - else if (addr == offsetof(struct compat_per_struct_kernel, access_id)) - /* Access id of the last PER trap */ - return (__u32) child->thread.per_event.paid << 24; - return 0; -} - -/* - * Same as peek_user but for a 31 bit program. - */ -static u32 __peek_user_compat(struct task_struct *child, addr_t addr) -{ - addr_t offset; - __u32 tmp; - - if (addr < offsetof(struct compat_user, regs.acrs)) { - struct pt_regs *regs = task_pt_regs(child); - /* - * psw and gprs are stored on the stack - */ - if (addr == offsetof(struct compat_user, regs.psw.mask)) { - /* Fake a 31 bit psw mask. */ - tmp = (__u32)(regs->psw.mask >> 32); - tmp &= PSW32_MASK_USER | PSW32_MASK_RI; - tmp |= PSW32_USER_BITS; - } else if (addr == offsetof(struct compat_user, regs.psw.addr)) { - /* Fake a 31 bit psw address. */ - tmp = (__u32) regs->psw.addr | - (__u32)(regs->psw.mask & PSW_MASK_BA); - } else { - /* gpr 0-15 */ - tmp = *(__u32 *)((addr_t) ®s->psw + addr*2 + 4); - } - } else if (addr < offsetof(struct compat_user, regs.orig_gpr2)) { - /* - * access registers are stored in the thread structure - */ - offset = addr - offsetof(struct compat_user, regs.acrs); - tmp = *(__u32*)((addr_t) &child->thread.acrs + offset); - - } else if (addr == offsetof(struct compat_user, regs.orig_gpr2)) { - /* - * orig_gpr2 is stored on the kernel stack - */ - tmp = *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4); - - } else if (addr < offsetof(struct compat_user, regs.fp_regs)) { - /* - * prevent reads of padding hole between - * orig_gpr2 and fp_regs on s390. - */ - tmp = 0; - - } else if (addr == offsetof(struct compat_user, regs.fp_regs.fpc)) { - /* - * floating point control reg. is in the thread structure - */ - tmp = child->thread.ufpu.fpc; - - } else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) { - /* - * floating point regs. are in the child->thread.ufpu.vxrs array - */ - offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs); - tmp = *(__u32 *)((addr_t)child->thread.ufpu.vxrs + 2 * offset); - } else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) { - /* - * Handle access to the per_info structure. - */ - addr -= offsetof(struct compat_user, regs.per_info); - tmp = __peek_user_per_compat(child, addr); - - } else - tmp = 0; - - return tmp; -} - -static int peek_user_compat(struct task_struct *child, - addr_t addr, addr_t data) -{ - __u32 tmp; - - if (!is_compat_task() || (addr & 3) || addr > sizeof(struct user) - 3) - return -EIO; - - tmp = __peek_user_compat(child, addr); - return put_user(tmp, (__u32 __user *) data); -} - -/* - * Same as poke_user_per but for a 31 bit program. - */ -static inline void __poke_user_per_compat(struct task_struct *child, - addr_t addr, __u32 data) -{ - if (addr == offsetof(struct compat_per_struct_kernel, cr9)) - /* PER event mask of the user specified per set. */ - child->thread.per_user.control = - data & (PER_EVENT_MASK | PER_CONTROL_MASK); - else if (addr == offsetof(struct compat_per_struct_kernel, starting_addr)) - /* Starting address of the user specified per set. */ - child->thread.per_user.start = data; - else if (addr == offsetof(struct compat_per_struct_kernel, ending_addr)) - /* Ending address of the user specified per set. */ - child->thread.per_user.end = data; -} - -/* - * Same as poke_user but for a 31 bit program. - */ -static int __poke_user_compat(struct task_struct *child, - addr_t addr, addr_t data) -{ - __u32 tmp = (__u32) data; - addr_t offset; - - if (addr < offsetof(struct compat_user, regs.acrs)) { - struct pt_regs *regs = task_pt_regs(child); - /* - * psw, gprs, acrs and orig_gpr2 are stored on the stack - */ - if (addr == offsetof(struct compat_user, regs.psw.mask)) { - __u32 mask = PSW32_MASK_USER; - - mask |= is_ri_task(child) ? PSW32_MASK_RI : 0; - /* Build a 64 bit psw mask from 31 bit mask. */ - if ((tmp ^ PSW32_USER_BITS) & ~mask) - /* Invalid psw mask. */ - return -EINVAL; - if ((data & PSW32_MASK_ASC) == PSW32_ASC_HOME) - /* Invalid address-space-control bits */ - return -EINVAL; - regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | - (regs->psw.mask & PSW_MASK_BA) | - (__u64)(tmp & mask) << 32; - } else if (addr == offsetof(struct compat_user, regs.psw.addr)) { - /* Build a 64 bit psw address from 31 bit address. */ - regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN; - /* Transfer 31 bit amode bit to psw mask. */ - regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | - (__u64)(tmp & PSW32_ADDR_AMODE); - } else { - if (test_pt_regs_flag(regs, PIF_SYSCALL) && - addr == offsetof(struct compat_user, regs.gprs[2])) { - struct pt_regs *regs = task_pt_regs(child); - - regs->int_code = 0x20000 | (data & 0xffff); - } - /* gpr 0-15 */ - *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; - } - } else if (addr < offsetof(struct compat_user, regs.orig_gpr2)) { - /* - * access registers are stored in the thread structure - */ - offset = addr - offsetof(struct compat_user, regs.acrs); - *(__u32*)((addr_t) &child->thread.acrs + offset) = tmp; - - } else if (addr == offsetof(struct compat_user, regs.orig_gpr2)) { - /* - * orig_gpr2 is stored on the kernel stack - */ - *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4) = tmp; - - } else if (addr < offsetof(struct compat_user, regs.fp_regs)) { - /* - * prevent writess of padding hole between - * orig_gpr2 and fp_regs on s390. - */ - return 0; - - } else if (addr == offsetof(struct compat_user, regs.fp_regs.fpc)) { - /* - * floating point control reg. is in the thread structure - */ - child->thread.ufpu.fpc = data; - - } else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) { - /* - * floating point regs. are in the child->thread.ufpu.vxrs array - */ - offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs); - *(__u32 *)((addr_t)child->thread.ufpu.vxrs + 2 * offset) = tmp; - } else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) { - /* - * Handle access to the per_info structure. - */ - addr -= offsetof(struct compat_user, regs.per_info); - __poke_user_per_compat(child, addr, data); - } - - return 0; -} - -static int poke_user_compat(struct task_struct *child, - addr_t addr, addr_t data) -{ - if (!is_compat_task() || (addr & 3) || - addr > sizeof(struct compat_user) - 3) - return -EIO; - - return __poke_user_compat(child, addr, data); -} - -long compat_arch_ptrace(struct task_struct *child, compat_long_t request, - compat_ulong_t caddr, compat_ulong_t cdata) -{ - unsigned long addr = caddr; - unsigned long data = cdata; - compat_ptrace_area parea; - int copied, ret; - - switch (request) { - case PTRACE_PEEKUSR: - /* read the word at location addr in the USER area. */ - return peek_user_compat(child, addr, data); - - case PTRACE_POKEUSR: - /* write the word at location addr in the USER area */ - return poke_user_compat(child, addr, data); - - case PTRACE_PEEKUSR_AREA: - case PTRACE_POKEUSR_AREA: - if (copy_from_user(&parea, (void __force __user *) addr, - sizeof(parea))) - return -EFAULT; - addr = parea.kernel_addr; - data = parea.process_addr; - copied = 0; - while (copied < parea.len) { - if (request == PTRACE_PEEKUSR_AREA) - ret = peek_user_compat(child, addr, data); - else { - __u32 utmp; - if (get_user(utmp, - (__u32 __force __user *) data)) - return -EFAULT; - ret = poke_user_compat(child, addr, utmp); - } - if (ret) - return ret; - addr += sizeof(unsigned int); - data += sizeof(unsigned int); - copied += sizeof(unsigned int); - } - return 0; - case PTRACE_GET_LAST_BREAK: - return put_user(child->thread.last_break, (unsigned int __user *)data); - } - return compat_ptrace_request(child, request, addr, data); -} -#endif - /* * user_regset definitions. */ @@ -1033,7 +729,7 @@ static int s390_gs_cb_get(struct task_struct *target, { struct gs_cb *data = target->thread.gs_cb; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!data) return -ENODATA; @@ -1050,7 +746,7 @@ static int s390_gs_cb_set(struct task_struct *target, struct gs_cb gs_cb = { }, *data = NULL; int rc; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!target->thread.gs_cb) { data = kzalloc(sizeof(*data), GFP_KERNEL); @@ -1087,7 +783,7 @@ static int s390_gs_bc_get(struct task_struct *target, { struct gs_cb *data = target->thread.gs_bc_cb; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!data) return -ENODATA; @@ -1101,7 +797,7 @@ static int s390_gs_bc_set(struct task_struct *target, { struct gs_cb *data = target->thread.gs_bc_cb; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!data) { data = kzalloc(sizeof(*data), GFP_KERNEL); @@ -1206,7 +902,7 @@ static int s390_runtime_instr_set(struct task_struct *target, static const struct user_regset s390_regsets[] = { { - .core_note_type = NT_PRSTATUS, + USER_REGSET_NOTE_TYPE(PRSTATUS), .n = sizeof(s390_regs) / sizeof(long), .size = sizeof(long), .align = sizeof(long), @@ -1214,7 +910,7 @@ static const struct user_regset s390_regsets[] = { .set = s390_regs_set, }, { - .core_note_type = NT_PRFPREG, + USER_REGSET_NOTE_TYPE(PRFPREG), .n = sizeof(s390_fp_regs) / sizeof(long), .size = sizeof(long), .align = sizeof(long), @@ -1222,7 +918,7 @@ static const struct user_regset s390_regsets[] = { .set = s390_fpregs_set, }, { - .core_note_type = NT_S390_SYSTEM_CALL, + USER_REGSET_NOTE_TYPE(S390_SYSTEM_CALL), .n = 1, .size = sizeof(unsigned int), .align = sizeof(unsigned int), @@ -1230,7 +926,7 @@ static const struct user_regset s390_regsets[] = { .set = s390_system_call_set, }, { - .core_note_type = NT_S390_LAST_BREAK, + USER_REGSET_NOTE_TYPE(S390_LAST_BREAK), .n = 1, .size = sizeof(long), .align = sizeof(long), @@ -1238,7 +934,7 @@ static const struct user_regset s390_regsets[] = { .set = s390_last_break_set, }, { - .core_note_type = NT_S390_TDB, + USER_REGSET_NOTE_TYPE(S390_TDB), .n = 1, .size = 256, .align = 1, @@ -1246,7 +942,7 @@ static const struct user_regset s390_regsets[] = { .set = s390_tdb_set, }, { - .core_note_type = NT_S390_VXRS_LOW, + USER_REGSET_NOTE_TYPE(S390_VXRS_LOW), .n = __NUM_VXRS_LOW, .size = sizeof(__u64), .align = sizeof(__u64), @@ -1254,7 +950,7 @@ static const struct user_regset s390_regsets[] = { .set = s390_vxrs_low_set, }, { - .core_note_type = NT_S390_VXRS_HIGH, + USER_REGSET_NOTE_TYPE(S390_VXRS_HIGH), .n = __NUM_VXRS_HIGH, .size = sizeof(__vector128), .align = sizeof(__vector128), @@ -1262,7 +958,7 @@ static const struct user_regset s390_regsets[] = { .set = s390_vxrs_high_set, }, { - .core_note_type = NT_S390_GS_CB, + USER_REGSET_NOTE_TYPE(S390_GS_CB), .n = sizeof(struct gs_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), @@ -1270,7 +966,7 @@ static const struct user_regset s390_regsets[] = { .set = s390_gs_cb_set, }, { - .core_note_type = NT_S390_GS_BC, + USER_REGSET_NOTE_TYPE(S390_GS_BC), .n = sizeof(struct gs_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), @@ -1278,7 +974,7 @@ static const struct user_regset s390_regsets[] = { .set = s390_gs_bc_set, }, { - .core_note_type = NT_S390_RI_CB, + USER_REGSET_NOTE_TYPE(S390_RI_CB), .n = sizeof(struct runtime_instr_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), @@ -1294,225 +990,8 @@ static const struct user_regset_view user_s390_view = { .n = ARRAY_SIZE(s390_regsets) }; -#ifdef CONFIG_COMPAT -static int s390_compat_regs_get(struct task_struct *target, - const struct user_regset *regset, - struct membuf to) -{ - unsigned n; - - if (target == current) - save_access_regs(target->thread.acrs); - - for (n = 0; n < sizeof(s390_compat_regs); n += sizeof(compat_ulong_t)) - membuf_store(&to, __peek_user_compat(target, n)); - return 0; -} - -static int s390_compat_regs_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int rc = 0; - - if (target == current) - save_access_regs(target->thread.acrs); - - if (kbuf) { - const compat_ulong_t *k = kbuf; - while (count > 0 && !rc) { - rc = __poke_user_compat(target, pos, *k++); - count -= sizeof(*k); - pos += sizeof(*k); - } - } else { - const compat_ulong_t __user *u = ubuf; - while (count > 0 && !rc) { - compat_ulong_t word; - rc = __get_user(word, u++); - if (rc) - break; - rc = __poke_user_compat(target, pos, word); - count -= sizeof(*u); - pos += sizeof(*u); - } - } - - if (rc == 0 && target == current) - restore_access_regs(target->thread.acrs); - - return rc; -} - -static int s390_compat_regs_high_get(struct task_struct *target, - const struct user_regset *regset, - struct membuf to) -{ - compat_ulong_t *gprs_high; - int i; - - gprs_high = (compat_ulong_t *)task_pt_regs(target)->gprs; - for (i = 0; i < NUM_GPRS; i++, gprs_high += 2) - membuf_store(&to, *gprs_high); - return 0; -} - -static int s390_compat_regs_high_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - compat_ulong_t *gprs_high; - int rc = 0; - - gprs_high = (compat_ulong_t *) - &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)]; - if (kbuf) { - const compat_ulong_t *k = kbuf; - while (count > 0) { - *gprs_high = *k++; - *gprs_high += 2; - count -= sizeof(*k); - } - } else { - const compat_ulong_t __user *u = ubuf; - while (count > 0 && !rc) { - unsigned long word; - rc = __get_user(word, u++); - if (rc) - break; - *gprs_high = word; - *gprs_high += 2; - count -= sizeof(*u); - } - } - - return rc; -} - -static int s390_compat_last_break_get(struct task_struct *target, - const struct user_regset *regset, - struct membuf to) -{ - compat_ulong_t last_break = target->thread.last_break; - - return membuf_store(&to, (unsigned long)last_break); -} - -static int s390_compat_last_break_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - return 0; -} - -static const struct user_regset s390_compat_regsets[] = { - { - .core_note_type = NT_PRSTATUS, - .n = sizeof(s390_compat_regs) / sizeof(compat_long_t), - .size = sizeof(compat_long_t), - .align = sizeof(compat_long_t), - .regset_get = s390_compat_regs_get, - .set = s390_compat_regs_set, - }, - { - .core_note_type = NT_PRFPREG, - .n = sizeof(s390_fp_regs) / sizeof(compat_long_t), - .size = sizeof(compat_long_t), - .align = sizeof(compat_long_t), - .regset_get = s390_fpregs_get, - .set = s390_fpregs_set, - }, - { - .core_note_type = NT_S390_SYSTEM_CALL, - .n = 1, - .size = sizeof(compat_uint_t), - .align = sizeof(compat_uint_t), - .regset_get = s390_system_call_get, - .set = s390_system_call_set, - }, - { - .core_note_type = NT_S390_LAST_BREAK, - .n = 1, - .size = sizeof(long), - .align = sizeof(long), - .regset_get = s390_compat_last_break_get, - .set = s390_compat_last_break_set, - }, - { - .core_note_type = NT_S390_TDB, - .n = 1, - .size = 256, - .align = 1, - .regset_get = s390_tdb_get, - .set = s390_tdb_set, - }, - { - .core_note_type = NT_S390_VXRS_LOW, - .n = __NUM_VXRS_LOW, - .size = sizeof(__u64), - .align = sizeof(__u64), - .regset_get = s390_vxrs_low_get, - .set = s390_vxrs_low_set, - }, - { - .core_note_type = NT_S390_VXRS_HIGH, - .n = __NUM_VXRS_HIGH, - .size = sizeof(__vector128), - .align = sizeof(__vector128), - .regset_get = s390_vxrs_high_get, - .set = s390_vxrs_high_set, - }, - { - .core_note_type = NT_S390_HIGH_GPRS, - .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t), - .size = sizeof(compat_long_t), - .align = sizeof(compat_long_t), - .regset_get = s390_compat_regs_high_get, - .set = s390_compat_regs_high_set, - }, - { - .core_note_type = NT_S390_GS_CB, - .n = sizeof(struct gs_cb) / sizeof(__u64), - .size = sizeof(__u64), - .align = sizeof(__u64), - .regset_get = s390_gs_cb_get, - .set = s390_gs_cb_set, - }, - { - .core_note_type = NT_S390_GS_BC, - .n = sizeof(struct gs_cb) / sizeof(__u64), - .size = sizeof(__u64), - .align = sizeof(__u64), - .regset_get = s390_gs_bc_get, - .set = s390_gs_bc_set, - }, - { - .core_note_type = NT_S390_RI_CB, - .n = sizeof(struct runtime_instr_cb) / sizeof(__u64), - .size = sizeof(__u64), - .align = sizeof(__u64), - .regset_get = s390_runtime_instr_get, - .set = s390_runtime_instr_set, - }, -}; - -static const struct user_regset_view user_s390_compat_view = { - .name = "s390", - .e_machine = EM_S390, - .regsets = s390_compat_regsets, - .n = ARRAY_SIZE(s390_compat_regsets) -}; -#endif - const struct user_regset_view *task_user_regset_view(struct task_struct *task) { -#ifdef CONFIG_COMPAT - if (test_tsk_thread_flag(task, TIF_31BIT)) - return &user_s390_compat_view; -#endif return &user_s390_view; } @@ -1521,13 +1000,6 @@ static const char *gpr_names[NUM_GPRS] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", }; -unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset) -{ - if (offset >= NUM_GPRS) - return 0; - return regs->gprs[offset]; -} - int regs_query_register_offset(const char *name) { unsigned long offset; @@ -1547,29 +1019,3 @@ const char *regs_query_register_name(unsigned int offset) return NULL; return gpr_names[offset]; } - -static int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) -{ - unsigned long ksp = kernel_stack_pointer(regs); - - return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1)); -} - -/** - * regs_get_kernel_stack_nth() - get Nth entry of the stack - * @regs:pt_regs which contains kernel stack pointer. - * @n:stack entry number. - * - * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which - * is specifined by @regs. If the @n th entry is NOT in the kernel stack, - * this returns 0. - */ -unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) -{ - unsigned long addr; - - addr = kernel_stack_pointer(regs) + n * sizeof(long); - if (!regs_within_kernel_stack(regs, addr)) - return 0; - return *(unsigned long *)addr; -} diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index a3fea683b227..c1fe0b53c5ac 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -13,8 +13,7 @@ * This file handles the architecture-dependent parts of initialization */ -#define KMSG_COMPONENT "setup" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "setup: " fmt #include <linux/errno.h> #include <linux/export.h> @@ -47,13 +46,13 @@ #include <linux/kexec.h> #include <linux/crash_dump.h> #include <linux/memory.h> -#include <linux/compat.h> #include <linux/start_kernel.h> #include <linux/hugetlb.h> #include <linux/kmemleak.h> #include <asm/archrandom.h> #include <asm/boot_data.h> +#include <asm/machine.h> #include <asm/ipl.h> #include <asm/facility.h> #include <asm/smp.h> @@ -111,7 +110,7 @@ struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amod * Because the AMODE31 sections are relocated below 2G at startup, * the content of control registers CR2, CR5 and CR15 must be updated * with new addresses after the relocation. The initial initialization of - * control registers occurs in head64.S and then gets updated again after AMODE31 + * control registers occurs in head.S and then gets updated again after AMODE31 * relocation. We must access the relevant AMODE31 tables indirectly via * pointers placed in the .amode31.refs linker section. Those pointers get * updated automatically during AMODE31 relocation and always contain a valid @@ -157,25 +156,29 @@ u64 __bootdata_preserved(stfle_fac_list[16]); EXPORT_SYMBOL(stfle_fac_list); struct oldmem_data __bootdata_preserved(oldmem_data); -unsigned long VMALLOC_START; +char __bootdata(boot_rb)[PAGE_SIZE * 2]; +bool __bootdata(boot_earlyprintk); +size_t __bootdata(boot_rb_off); +char __bootdata(bootdebug_filter)[128]; +bool __bootdata(bootdebug); + +unsigned long __bootdata_preserved(VMALLOC_START); EXPORT_SYMBOL(VMALLOC_START); -unsigned long VMALLOC_END; +unsigned long __bootdata_preserved(VMALLOC_END); EXPORT_SYMBOL(VMALLOC_END); -struct page *vmemmap; +struct page *__bootdata_preserved(vmemmap); EXPORT_SYMBOL(vmemmap); -unsigned long vmemmap_size; +unsigned long __bootdata_preserved(vmemmap_size); -unsigned long MODULES_VADDR; -unsigned long MODULES_END; +unsigned long __bootdata_preserved(MODULES_VADDR); +unsigned long __bootdata_preserved(MODULES_END); /* An array with a pointer to the lowcore of every CPU. */ struct lowcore *lowcore_ptr[NR_CPUS]; EXPORT_SYMBOL(lowcore_ptr); -DEFINE_STATIC_KEY_FALSE(cpu_has_bear); - /* * The Write Back bit position in the physaddr is given by the SLPC PCI. * Leaving the mask zero always uses write through which is safe @@ -245,7 +248,7 @@ static void __init conmode_default(void) char query_buffer[1024]; char *ptr; - if (MACHINE_IS_VM) { + if (machine_is_vm()) { cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL); console_devno = simple_strtoul(query_buffer + 5, NULL, 16); ptr = strstr(query_buffer, "SUBCHANNEL ="); @@ -283,7 +286,7 @@ static void __init conmode_default(void) SET_CONSOLE_SCLP; #endif } - } else if (MACHINE_IS_KVM) { + } else if (machine_is_kvm()) { if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE)) SET_CONSOLE_VT220; else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE)) @@ -359,36 +362,24 @@ void *restart_stack; unsigned long stack_alloc(void) { -#ifdef CONFIG_VMAP_STACK - void *ret; + void *stack; - ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP, - NUMA_NO_NODE, __builtin_return_address(0)); - kmemleak_not_leak(ret); - return (unsigned long)ret; -#else - return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); -#endif + stack = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP, + NUMA_NO_NODE, __builtin_return_address(0)); + kmemleak_not_leak(stack); + return (unsigned long)stack; } void stack_free(unsigned long stack) { -#ifdef CONFIG_VMAP_STACK - vfree((void *) stack); -#else - free_pages(stack, THREAD_SIZE_ORDER); -#endif + vfree((void *)stack); } static unsigned long __init stack_alloc_early(void) { unsigned long stack; - stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE); - if (!stack) { - panic("%s: Failed to allocate %lu bytes align=0x%lx\n", - __func__, THREAD_SIZE, THREAD_SIZE); - } + stack = (unsigned long)memblock_alloc_or_panic(THREAD_SIZE, THREAD_SIZE); return stack; } @@ -421,7 +412,6 @@ static void __init setup_lowcore(void) lc->clock_comparator = clock_comparator_max; lc->current_task = (unsigned long)&init_task; lc->lpp = LPP_MAGIC; - lc->machine_flags = get_lowcore()->machine_flags; lc->preempt_count = get_lowcore()->preempt_count; nmi_alloc_mcesa_early(&lc->mcesad); lc->sys_enter_timer = get_lowcore()->sys_enter_timer; @@ -512,10 +502,7 @@ static void __init setup_resources(void) bss_resource.end = __pa_symbol(__bss_stop) - 1; for_each_mem_range(i, &start, &end) { - res = memblock_alloc(sizeof(*res), 8); - if (!res) - panic("%s: Failed to allocate %zu bytes align=0x%x\n", - __func__, sizeof(*res), 8); + res = memblock_alloc_or_panic(sizeof(*res), 8); res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; res->name = "System RAM"; @@ -534,10 +521,7 @@ static void __init setup_resources(void) std_res->start > res->end) continue; if (std_res->end > res->end) { - sub_res = memblock_alloc(sizeof(*sub_res), 8); - if (!sub_res) - panic("%s: Failed to allocate %zu bytes align=0x%x\n", - __func__, sizeof(*sub_res), 8); + sub_res = memblock_alloc_or_panic(sizeof(*sub_res), 8); *sub_res = *std_res; sub_res->end = res->end; std_res->start = res->end + 1; @@ -619,7 +603,7 @@ static void __init reserve_crashkernel(void) int rc; rc = parse_crashkernel(boot_command_line, ident_map_size, - &crash_size, &crash_base, NULL, NULL); + &crash_size, &crash_base, NULL, NULL, NULL); crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN); @@ -664,7 +648,7 @@ static void __init reserve_crashkernel(void) return; } - if (!oldmem_data.start && MACHINE_IS_VM) + if (!oldmem_data.start && machine_is_vm()) diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; @@ -704,7 +688,7 @@ static void __init reserve_physmem_info(void) { unsigned long addr, size; - if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size)) + if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size)) memblock_reserve(addr, size); } @@ -712,7 +696,7 @@ static void __init free_physmem_info(void) { unsigned long addr, size; - if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size)) + if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size)) memblock_phys_free(addr, size); } @@ -733,6 +717,11 @@ static void __init memblock_add_physmem_info(void) memblock_set_node(0, ULONG_MAX, &memblock.memory, 0); } +static void __init setup_high_memory(void) +{ + high_memory = __va(ident_map_size); +} + /* * Reserve memory used for lowcore. */ @@ -742,7 +731,7 @@ static void __init reserve_lowcore(void) void *lowcore_end = lowcore_start + sizeof(struct lowcore); void *start, *end; - if ((void *)__identity_base < lowcore_end) { + if (absolute_pointer(__identity_base) < lowcore_end) { start = max(lowcore_start, (void *)__identity_base); end = min(lowcore_end, (void *)(__identity_base + ident_map_size)); memblock_reserve(__pa(start), __pa(end)); @@ -824,9 +813,7 @@ static void __init setup_randomness(void) { struct sysinfo_3_2_2 *vmms; - vmms = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - if (!vmms) - panic("Failed to allocate memory for sysinfo structure\n"); + vmms = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE); if (stsi(vmms, 3, 2, 2) == 0 && vmms->count) add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count); memblock_free(vmms, PAGE_SIZE); @@ -850,7 +837,7 @@ static void __init setup_control_program_code(void) return; diag_stat_inc(DIAG_STAT_X318); - asm volatile("diag %0,0,0x318\n" : : "d" (diag318_info.val)); + asm volatile("diag %0,0,0x318" : : "d" (diag318_info.val)); } /* @@ -886,6 +873,23 @@ static void __init log_component_list(void) } /* + * Print avoiding interpretation of % in buf and taking bootdebug option + * into consideration. + */ +static void __init print_rb_entry(const char *buf) +{ + char fmt[] = KERN_SOH "0boot: %s"; + int level = printk_get_level(buf); + + buf = skip_timestamp(printk_skip_level(buf)); + if (level == KERN_DEBUG[1] && (!bootdebug || !bootdebug_filter_match(buf))) + return; + + fmt[1] = level; + printk(fmt, buf); +} + +/* * Setup function called from init/main.c just after the banner * was printed. */ @@ -895,17 +899,20 @@ void __init setup_arch(char **cmdline_p) /* * print what head.S has found out about the machine */ - if (MACHINE_IS_VM) + if (machine_is_vm()) pr_info("Linux is running as a z/VM " "guest operating system in 64-bit mode\n"); - else if (MACHINE_IS_KVM) + else if (machine_is_kvm()) pr_info("Linux is running under KVM in 64-bit mode\n"); - else if (MACHINE_IS_LPAR) + else if (machine_is_lpar()) pr_info("Linux is running natively in 64-bit mode\n"); else pr_info("Linux is running as a guest in 64-bit mode\n"); + /* Print decompressor messages if not already printed */ + if (!boot_earlyprintk) + boot_rb_foreach(print_rb_entry); - if (have_relocated_lowcore()) + if (machine_has_relocated_lowcore()) pr_info("Lowcore relocated to 0x%px\n", get_lowcore()); log_component_list(); @@ -947,6 +954,7 @@ void __init setup_arch(char **cmdline_p) free_physmem_info(); setup_memory_end(); + setup_high_memory(); memblock_dump_all(); setup_memory(); @@ -955,7 +963,7 @@ void __init setup_arch(char **cmdline_p) setup_uv(); dma_contiguous_reserve(ident_map_size); vmcp_cma_reserve(); - if (MACHINE_HAS_EDAT2) + if (cpu_has_edat2()) hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); reserve_crashkernel(); @@ -975,10 +983,7 @@ void __init setup_arch(char **cmdline_p) numa_setup(); smp_detect_cpus(); topology_init_early(); - - if (test_facility(193)) - static_branch_enable(&cpu_has_bear); - + setup_protection_map(); /* * Create kernel page tables. */ @@ -1006,3 +1011,8 @@ void __init setup_arch(char **cmdline_p) /* Add system specific data to the random pool */ setup_randomness(); } + +void __init arch_cpu_finalize_init(void) +{ + sclp_init(); +} diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index e48013cd832c..4874de5edea0 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -27,7 +27,6 @@ #include <linux/personality.h> #include <linux/binfmts.h> #include <linux/syscalls.h> -#include <linux/compat.h> #include <asm/ucontext.h> #include <linux/uaccess.h> #include <asm/vdso-symbols.h> @@ -290,12 +289,6 @@ static int setup_frame(int sig, struct k_sigaction *ka, unsigned long restorer; size_t frame_size; - /* - * gprs_high are only present for a 31-bit task running on - * a 64-bit kernel (see compat_signal.c) but the space for - * gprs_high need to be allocated if vector registers are - * included in the signal frame on a 31-bit system. - */ frame_size = sizeof(*frame) - sizeof(frame->sregs_ext); if (cpu_has_vx()) frame_size += sizeof(frame->sregs_ext); @@ -333,7 +326,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, if (ka->sa.sa_flags & SA_RESTORER) restorer = (unsigned long) ka->sa.sa_restorer; else - restorer = VDSO64_SYMBOL(current, sigreturn); + restorer = VDSO_SYMBOL(current, sigreturn); /* Set up registers for signal handler */ regs->gprs[14] = restorer; @@ -367,12 +360,6 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, size_t frame_size; frame_size = sizeof(struct rt_sigframe) - sizeof(_sigregs_ext); - /* - * gprs_high are only present for a 31-bit task running on - * a 64-bit kernel (see compat_signal.c) but the space for - * gprs_high need to be allocated if vector registers are - * included in the signal frame on a 31-bit system. - */ uc_flags = 0; if (cpu_has_vx()) { frame_size += sizeof(_sigregs_ext); @@ -391,7 +378,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, if (ksig->ka.sa.sa_flags & SA_RESTORER) restorer = (unsigned long) ksig->ka.sa.sa_restorer; else - restorer = VDSO64_SYMBOL(current, rt_sigreturn); + restorer = VDSO_SYMBOL(current, rt_sigreturn); /* Create siginfo on the signal stack */ if (copy_siginfo_to_user(&frame->info, &ksig->info)) @@ -490,10 +477,7 @@ void arch_do_signal_or_restart(struct pt_regs *regs) clear_pt_regs_flag(regs, PIF_SYSCALL); rseq_signal_deliver(&ksig, regs); - if (is_compat_task()) - handle_signal32(&ksig, oldset, regs); - else - handle_signal(&ksig, oldset, regs); + handle_signal(&ksig, oldset, regs); return; } @@ -506,10 +490,7 @@ void arch_do_signal_or_restart(struct pt_regs *regs) /* Restart with sys_restart_syscall */ regs->gprs[2] = regs->orig_gpr2; current->restart_block.arch_data = regs->psw.addr; - if (is_compat_task()) - regs->psw.addr = VDSO32_SYMBOL(current, restart_syscall); - else - regs->psw.addr = VDSO64_SYMBOL(current, restart_syscall); + regs->psw.addr = VDSO_SYMBOL(current, restart_syscall); if (test_thread_flag(TIF_SINGLE_STEP)) clear_thread_flag(TIF_PER_TRAP); break; diff --git a/arch/s390/kernel/skey.c b/arch/s390/kernel/skey.c new file mode 100644 index 000000000000..cc869de6e3a5 --- /dev/null +++ b/arch/s390/kernel/skey.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <asm/rwonce.h> +#include <asm/page.h> +#include <asm/skey.h> + +int skey_regions_initialized; + +static inline unsigned long load_real_address(unsigned long address) +{ + unsigned long real; + + asm volatile( + " lra %[real],0(%[address])" + : [real] "=d" (real) + : [address] "a" (address) + : "cc"); + return real; +} + +/* + * Initialize storage keys of registered memory regions with the + * default key. This is useful for code which is executed with a + * non-default access key. + */ +void __skey_regions_initialize(void) +{ + unsigned long address, real; + struct skey_region *r, *end; + + r = __skey_region_start; + end = __skey_region_end; + while (r < end) { + address = r->start & PAGE_MASK; + do { + real = load_real_address(address); + page_set_storage_key(real, PAGE_DEFAULT_KEY, 1); + address += PAGE_SIZE; + } while (address < r->end); + r++; + } + /* + * Make sure storage keys are initialized before + * skey_regions_initialized is changed. + */ + barrier(); + WRITE_ONCE(skey_regions_initialized, 1); +} diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 822d8e6f8717..b7429f30afc1 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -15,9 +15,9 @@ * operates on physical cpu numbers needs to go into smp.c. */ -#define KMSG_COMPONENT "cpu" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "cpu: " fmt +#include <linux/cpufeature.h> #include <linux/workqueue.h> #include <linux/memblock.h> #include <linux/export.h> @@ -38,6 +38,7 @@ #include <linux/kprobes.h> #include <asm/access-regs.h> #include <asm/asm-offsets.h> +#include <asm/machine.h> #include <asm/ctlreg.h> #include <asm/pfault.h> #include <asm/diag.h> @@ -97,13 +98,6 @@ __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS]; static unsigned int smp_max_threads __initdata = -1U; cpumask_t cpu_setup_mask; -static int __init early_nosmt(char *s) -{ - smp_max_threads = 1; - return 0; -} -early_param("nosmt", early_nosmt); - static int __init early_smt(char *s) { get_option(&s, &smp_max_threads); @@ -180,13 +174,10 @@ static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address) static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) { - int order; - if (test_and_set_bit(ec_bit, &pcpu->ec_mask)) return; - order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL; pcpu->ec_clk = get_tod_clock_fast(); - pcpu_sigp_retry(pcpu, order, 0); + pcpu_sigp_retry(pcpu, SIGP_EXTERNAL_CALL, 0); } static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) @@ -263,13 +254,12 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) lc->percpu_offset = __per_cpu_offset[cpu]; lc->kernel_asce = get_lowcore()->kernel_asce; lc->user_asce = s390_invalid_asce; - lc->machine_flags = get_lowcore()->machine_flags; lc->user_timer = lc->system_timer = lc->steal_timer = lc->avg_steal_timer = 0; abs_lc = get_abs_lowcore(); memcpy(lc->cregs_save_area, abs_lc->cregs_save_area, sizeof(lc->cregs_save_area)); put_abs_lowcore(abs_lc); - lc->cregs_save_area[1] = lc->kernel_asce; + lc->cregs_save_area[1] = lc->user_asce; lc->cregs_save_area[7] = lc->user_asce; save_access_regs((unsigned int *) lc->access_regs_save_area); arch_spin_lock_setup(cpu); @@ -290,6 +280,9 @@ static void pcpu_attach_task(int cpu, struct task_struct *tsk) lc->hardirq_timer = tsk->thread.hardirq_timer; lc->softirq_timer = tsk->thread.softirq_timer; lc->steal_timer = 0; +#ifdef CONFIG_STACKPROTECTOR + lc->stack_canary = tsk->stack_canary; +#endif } static void pcpu_start_fn(int cpu, void (*func)(void *), void *data) @@ -314,9 +307,9 @@ static void __pcpu_delegate(pcpu_delegate_fn *func, void *data) func(data); /* should not return */ } -static void pcpu_delegate(struct pcpu *pcpu, int cpu, - pcpu_delegate_fn *func, - void *data, unsigned long stack) +static void __noreturn pcpu_delegate(struct pcpu *pcpu, int cpu, + pcpu_delegate_fn *func, + void *data, unsigned long stack) { struct lowcore *lc, *abs_lc; unsigned int source_cpu; @@ -349,7 +342,7 @@ static void pcpu_delegate(struct pcpu *pcpu, int cpu, "0: sigp 0,%0,%2 # sigp restart to target cpu\n" " brc 2,0b # busy, try again\n" "1: sigp 0,%1,%3 # sigp stop to current cpu\n" - " brc 2,1b # busy, try again\n" + " brc 2,1b # busy, try again" : : "d" (pcpu->address), "d" (source_cpu), "K" (SIGP_RESTART), "K" (SIGP_STOP) : "0", "1", "cc"); @@ -379,7 +372,7 @@ static int pcpu_set_smt(unsigned int mtid) /* * Call function on the ipl CPU. */ -void smp_call_ipl_cpu(void (*func)(void *), void *data) +void __noreturn smp_call_ipl_cpu(void (*func)(void *), void *data) { struct lowcore *lc = lowcore_ptr[0]; @@ -416,7 +409,7 @@ EXPORT_SYMBOL(arch_vcpu_is_preempted); void notrace smp_yield_cpu(int cpu) { - if (!MACHINE_HAS_DIAG9C) + if (!machine_has_diag9c()) return; diag_stat_inc_norecursion(DIAG_STAT_X09C); asm volatile("diag %0,0,0x9c" @@ -439,16 +432,16 @@ void notrace smp_emergency_stop(void) cpumask_copy(&cpumask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &cpumask); - end = get_tod_clock() + (1000000UL << 12); + end = get_tod_clock_monotonic() + (1000000UL << 12); for_each_cpu(cpu, &cpumask) { struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu); set_bit(ec_stop_cpu, &pcpu->ec_mask); while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL, 0, NULL) == SIGP_CC_BUSY && - get_tod_clock() < end) + get_tod_clock_monotonic() < end) cpu_relax(); } - while (get_tod_clock() < end) { + while (get_tod_clock_monotonic() < end) { for_each_cpu(cpu, &cpumask) if (pcpu_stopped(per_cpu_ptr(&pcpu_devices, cpu))) cpumask_clear_cpu(cpu, &cpumask); @@ -561,10 +554,10 @@ int smp_store_status(int cpu) if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; - if (!cpu_has_vx() && !MACHINE_HAS_GS) + if (!cpu_has_vx() && !cpu_has_gs()) return 0; pa = lc->mcesad & MCESA_ORIGIN_MASK; - if (MACHINE_HAS_GS) + if (cpu_has_gs()) pa |= lc->mcesad & MCESA_LC_MASK; if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) @@ -611,9 +604,7 @@ void __init smp_save_dump_ipl_cpu(void) if (!dump_available()) return; sa = save_area_alloc(true); - regs = memblock_alloc(512, 8); - if (!sa || !regs) - panic("could not allocate memory for boot CPU save area\n"); + regs = memblock_alloc_or_panic(512, 8); copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512); save_area_add_regs(sa, regs); memblock_free(regs, 512); @@ -646,8 +637,6 @@ void __init smp_save_dump_secondary_cpus(void) SIGP_CC_NOT_OPERATIONAL) continue; sa = save_area_alloc(false); - if (!sa) - panic("could not allocate memory for save area\n"); __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(page)); save_area_add_regs(sa, page); if (cpu_has_vx()) { @@ -710,6 +699,7 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early) continue; info->core[info->configured].core_id = address >> smp_cpu_mt_shift; + info->core[info->configured].type = boot_core_type; info->configured++; } info->combined = info->configured; @@ -792,10 +782,7 @@ void __init smp_detect_cpus(void) u16 address; /* Get CPU information */ - info = memblock_alloc(sizeof(*info), 8); - if (!info) - panic("%s: Failed to allocate %zu bytes align=0x%x\n", - __func__, sizeof(*info), 8); + info = memblock_alloc_or_panic(sizeof(*info), 8); smp_get_core_info(info, 1); /* Find boot CPU type */ if (sclp.has_core_type) { @@ -814,6 +801,7 @@ void __init smp_detect_cpus(void) mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp; mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1; pcpu_set_smt(mtid); + cpu_smt_set_num_threads(smp_cpu_mtid + 1, smp_cpu_mtid + 1); /* Print number of CPUs */ c_cpus = s_cpus = 0; diff --git a/arch/s390/kernel/stackprotector.c b/arch/s390/kernel/stackprotector.c new file mode 100644 index 000000000000..d4e40483f008 --- /dev/null +++ b/arch/s390/kernel/stackprotector.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef pr_fmt +#define pr_fmt(fmt) "stackprot: " fmt +#endif + +#include <linux/export.h> +#include <linux/uaccess.h> +#include <linux/printk.h> +#include <asm/abs_lowcore.h> +#include <asm/sections.h> +#include <asm/machine.h> +#include <asm/asm-offsets.h> +#include <asm/arch-stackprotector.h> + +#ifdef __DECOMPRESSOR + +#define DEBUGP boot_debug +#define EMERGP boot_emerg +#define PANIC boot_panic + +#else /* __DECOMPRESSOR */ + +#define DEBUGP pr_debug +#define EMERGP pr_emerg +#define PANIC panic + +#endif /* __DECOMPRESSOR */ + +int __bootdata_preserved(stack_protector_debug); + +unsigned long __stack_chk_guard; +EXPORT_SYMBOL(__stack_chk_guard); + +struct insn_ril { + u8 opc1 : 8; + u8 r1 : 4; + u8 opc2 : 4; + u32 imm; +} __packed; + +/* + * Convert a virtual instruction address to a real instruction address. The + * decompressor needs to patch instructions within the kernel image based on + * their virtual addresses, while dynamic address translation is still + * disabled. Therefore a translation from virtual kernel image addresses to + * the corresponding physical addresses is required. + * + * After dynamic address translation is enabled and when the kernel needs to + * patch instructions such a translation is not required since the addresses + * are identical. + */ +static struct insn_ril *vaddress_to_insn(unsigned long vaddress) +{ +#ifdef __DECOMPRESSOR + return (struct insn_ril *)__kernel_pa(vaddress); +#else + return (struct insn_ril *)vaddress; +#endif +} + +static unsigned long insn_to_vaddress(struct insn_ril *insn) +{ +#ifdef __DECOMPRESSOR + return (unsigned long)__kernel_va(insn); +#else + return (unsigned long)insn; +#endif +} + +#define INSN_RIL_STRING_SIZE (sizeof(struct insn_ril) * 2 + 1) + +static void insn_ril_to_string(char *str, struct insn_ril *insn) +{ + u8 *ptr = (u8 *)insn; + int i; + + for (i = 0; i < sizeof(*insn); i++) + hex_byte_pack(&str[2 * i], ptr[i]); + str[2 * i] = 0; +} + +static void stack_protector_dump(struct insn_ril *old, struct insn_ril *new) +{ + char ostr[INSN_RIL_STRING_SIZE]; + char nstr[INSN_RIL_STRING_SIZE]; + + insn_ril_to_string(ostr, old); + insn_ril_to_string(nstr, new); + DEBUGP("%016lx: %s -> %s\n", insn_to_vaddress(old), ostr, nstr); +} + +static int stack_protector_verify(struct insn_ril *insn, unsigned long kernel_start) +{ + char istr[INSN_RIL_STRING_SIZE]; + unsigned long vaddress, offset; + + /* larl */ + if (insn->opc1 == 0xc0 && insn->opc2 == 0x0) + return 0; + /* lgrl */ + if (insn->opc1 == 0xc4 && insn->opc2 == 0x8) + return 0; + insn_ril_to_string(istr, insn); + vaddress = insn_to_vaddress(insn); + if (__is_defined(__DECOMPRESSOR)) { + offset = (unsigned long)insn - kernel_start + TEXT_OFFSET; + EMERGP("Unexpected instruction at %016lx/%016lx: %s\n", vaddress, offset, istr); + PANIC("Stackprotector error\n"); + } else { + EMERGP("Unexpected instruction at %016lx: %s\n", vaddress, istr); + } + return -EINVAL; +} + +int __stack_protector_apply(unsigned long *start, unsigned long *end, unsigned long kernel_start) +{ + unsigned long canary, *loc; + struct insn_ril *insn, new; + int rc; + + /* + * Convert LARL/LGRL instructions to LLILF so register R1 contains the + * address of the per-cpu / per-process stack canary: + * + * LARL/LGRL R1,__stack_chk_guard => LLILF R1,__lc_stack_canary + */ + canary = __LC_STACK_CANARY; + if (machine_has_relocated_lowcore()) + canary += LOWCORE_ALT_ADDRESS; + for (loc = start; loc < end; loc++) { + insn = vaddress_to_insn(*loc); + rc = stack_protector_verify(insn, kernel_start); + if (rc) + return rc; + new = *insn; + new.opc1 = 0xc0; + new.opc2 = 0xf; + new.imm = canary; + if (stack_protector_debug) + stack_protector_dump(insn, &new); + s390_kernel_write(insn, &new, sizeof(*insn)); + } + return 0; +} + +#ifdef __DECOMPRESSOR +void __stack_protector_apply_early(unsigned long kernel_start) +{ + unsigned long *start, *end; + + start = (unsigned long *)vmlinux.stack_prot_start; + end = (unsigned long *)vmlinux.stack_prot_end; + __stack_protector_apply(start, end, kernel_start); +} +#endif diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 9f59837d159e..3aae7f70e6ab 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -8,7 +8,7 @@ #include <linux/perf_event.h> #include <linux/stacktrace.h> #include <linux/uaccess.h> -#include <linux/compat.h> +#include <asm/asm-offsets.h> #include <asm/stacktrace.h> #include <asm/unwind.h> #include <asm/kprobes.h> @@ -106,8 +106,6 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo unsigned long ip, sp; bool first = true; - if (is_compat_task()) - return; if (!current->mm) return; ip = instruction_pointer(regs); @@ -151,7 +149,7 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo break; } if (!store_ip(consume_entry, cookie, entry, perf, ip)) - return; + break; first = false; } pagefault_enable(); diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c index d40f0b983e74..5eae2e25997a 100644 --- a/arch/s390/kernel/sthyi.c +++ b/arch/s390/kernel/sthyi.c @@ -5,6 +5,8 @@ * Copyright IBM Corp. 2016 * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com> */ + +#include <linux/export.h> #include <linux/errno.h> #include <linux/pagemap.h> #include <linux/vmalloc.h> @@ -251,7 +253,7 @@ static void fill_diag_mac(struct sthyi_sctns *sctns, sctns->mac.infmval1 |= MAC_CNT_VLD; } -/* Returns a pointer to the the next partition block. */ +/* Returns a pointer to the next partition block. */ static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf, bool this_lpar, void *diag224_buf, diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c index 5ec28028315b..795b6cca74c9 100644 --- a/arch/s390/kernel/syscall.c +++ b/arch/s390/kernel/syscall.c @@ -12,6 +12,7 @@ * platform. */ +#include <linux/cpufeature.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/mm.h> @@ -38,6 +39,16 @@ #include "entry.h" +#define __SYSCALL(nr, sym) long __s390x_##sym(struct pt_regs *); +#include <asm/syscall_table.h> +#undef __SYSCALL + +#define __SYSCALL(nr, sym) [nr] = (__s390x_##sym), +const sys_call_ptr_t sys_call_table[__NR_syscalls] = { +#include <asm/syscall_table.h> +}; +#undef __SYSCALL + #ifdef CONFIG_SYSVIPC /* * sys_ipc() is the de-multiplexer for the SysV IPC calls. @@ -81,25 +92,35 @@ SYSCALL_DEFINE0(ni_syscall) return -ENOSYS; } -static void do_syscall(struct pt_regs *regs) +void noinstr __do_syscall(struct pt_regs *regs, int per_trap) { unsigned long nr; + add_random_kstack_offset(); + enter_from_user_mode(regs); + regs->psw = get_lowcore()->svc_old_psw; + regs->int_code = get_lowcore()->svc_int_code; + update_timer_sys(); + if (cpu_has_bear()) + current->thread.last_break = regs->last_break; + local_irq_enable(); + regs->orig_gpr2 = regs->gprs[2]; + if (unlikely(per_trap)) + set_thread_flag(TIF_PER_TRAP); + regs->flags = 0; + set_pt_regs_flag(regs, PIF_SYSCALL); nr = regs->int_code & 0xffff; - if (!nr) { + if (likely(!nr)) { nr = regs->gprs[1] & 0xffff; regs->int_code &= ~0xffffUL; regs->int_code |= nr; } - regs->gprs[2] = nr; - if (nr == __NR_restart_syscall && !(current->restart_block.arch_data & 1)) { regs->psw.addr = current->restart_block.arch_data; current->restart_block.arch_data = 1; } nr = syscall_enter_from_user_mode_work(regs, nr); - /* * In the s390 ptrace ABI, both the syscall number and the return value * use gpr2. However, userspace puts the syscall number either in the @@ -107,37 +128,11 @@ static void do_syscall(struct pt_regs *regs) * work, the ptrace code sets PIF_SYSCALL_RET_SET, which is checked here * and if set, the syscall will be skipped. */ - if (unlikely(test_and_clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET))) goto out; regs->gprs[2] = -ENOSYS; - if (likely(nr >= NR_syscalls)) - goto out; - do { - regs->gprs[2] = current->thread.sys_call_table[nr](regs); - } while (test_and_clear_pt_regs_flag(regs, PIF_EXECVE_PGSTE_RESTART)); + if (likely(nr < NR_syscalls)) + regs->gprs[2] = sys_call_table[nr](regs); out: - syscall_exit_to_user_mode_work(regs); -} - -void noinstr __do_syscall(struct pt_regs *regs, int per_trap) -{ - add_random_kstack_offset(); - enter_from_user_mode(regs); - regs->psw = get_lowcore()->svc_old_psw; - regs->int_code = get_lowcore()->svc_int_code; - update_timer_sys(); - if (static_branch_likely(&cpu_has_bear)) - current->thread.last_break = regs->last_break; - - local_irq_enable(); - regs->orig_gpr2 = regs->gprs[2]; - - if (per_trap) - set_thread_flag(TIF_PER_TRAP); - - regs->flags = 0; - set_pt_regs_flag(regs, PIF_SYSCALL); - do_syscall(regs); - exit_to_user_mode(); + syscall_exit_to_user_mode(regs); } diff --git a/arch/s390/kernel/syscalls/Makefile b/arch/s390/kernel/syscalls/Makefile index c5d958a09ff4..d5fca0ca0890 100644 --- a/arch/s390/kernel/syscalls/Makefile +++ b/arch/s390/kernel/syscalls/Makefile @@ -1,48 +1,32 @@ # SPDX-License-Identifier: GPL-2.0 +kapi := arch/$(SRCARCH)/include/generated/asm +uapi := arch/$(SRCARCH)/include/generated/uapi/asm -gen := arch/$(ARCH)/include/generated -kapi := $(gen)/asm -uapi := $(gen)/uapi/asm - -syscall := $(src)/syscall.tbl -systbl := $(src)/syscalltbl - -gen-y := $(kapi)/syscall_table.h -kapi-hdrs-y := $(kapi)/unistd_nr.h -uapi-hdrs-y := $(uapi)/unistd_32.h -uapi-hdrs-y += $(uapi)/unistd_64.h - -targets += $(addprefix ../../../../,$(gen-y) $(kapi-hdrs-y) $(uapi-hdrs-y)) - -PHONY += kapi uapi - -kapi: $(gen-y) $(kapi-hdrs-y) -uapi: $(uapi-hdrs-y) - - -# Create output directory if not already present $(shell mkdir -p $(uapi) $(kapi)) -quiet_cmd_syshdr = SYSHDR $@ - cmd_syshdr = $(CONFIG_SHELL) '$(systbl)' -H -a $(syshdr_abi_$(basetarget)) -f "$@" < $< > $@ - -quiet_cmd_sysnr = SYSNR $@ - cmd_sysnr = $(CONFIG_SHELL) '$(systbl)' -N -a $(sysnr_abi_$(basetarget)) < $< > $@ +syscall := $(src)/syscall.tbl +syshdr := $(srctree)/scripts/syscallhdr.sh +systbl := $(srctree)/scripts/syscalltbl.sh -quiet_cmd_syscalls = SYSTBL $@ - cmd_syscalls = $(CONFIG_SHELL) '$(systbl)' -S < $< > $@ +quiet_cmd_syshdr = SYSHDR $@ + cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --emit-nr --abis common,$* $< $@ -syshdr_abi_unistd_32 := common,32 -$(uapi)/unistd_32.h: $(syscall) $(systbl) FORCE - $(call if_changed,syshdr) +quiet_cmd_systbl = SYSTBL $@ + cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis common,$* $< $@ -syshdr_abi_unistd_64 := common,64 -$(uapi)/unistd_64.h: $(syscall) $(systbl) FORCE +$(uapi)/unistd_%.h: $(syscall) $(syshdr) FORCE $(call if_changed,syshdr) $(kapi)/syscall_table.h: $(syscall) $(systbl) FORCE - $(call if_changed,syscalls) + $(call if_changed,systbl) + +uapisyshdr-y += unistd_64.h +kapisyshdr-y += syscall_table.h + +uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y)) +kapisyshdr-y := $(addprefix $(kapi)/, $(kapisyshdr-y)) +targets += $(addprefix ../../../../, $(uapisyshdr-y) $(kapisyshdr-y)) -sysnr_abi_unistd_nr := common,32,64 -$(kapi)/unistd_nr.h: $(syscall) $(systbl) FORCE - $(call if_changed,sysnr) +PHONY += all +all: $(uapisyshdr-y) $(kapisyshdr-y) + @: diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index e9115b4d8b63..417ed16b3c63 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -3,469 +3,397 @@ # System call table for s390 # # Format: +# <nr> <abi> <syscall> <entry> # -# <nr> <abi> <syscall> <entry-64bit> <compat-entry> -# -# where <abi> can be common, 64, or 32 +# <abi> is always common. -1 common exit sys_exit sys_exit -2 common fork sys_fork sys_fork -3 common read sys_read compat_sys_s390_read -4 common write sys_write compat_sys_s390_write -5 common open sys_open compat_sys_open -6 common close sys_close sys_close -7 common restart_syscall sys_restart_syscall sys_restart_syscall -8 common creat sys_creat sys_creat -9 common link sys_link sys_link -10 common unlink sys_unlink sys_unlink -11 common execve sys_execve compat_sys_execve -12 common chdir sys_chdir sys_chdir -13 32 time - sys_time32 -14 common mknod sys_mknod sys_mknod -15 common chmod sys_chmod sys_chmod -16 32 lchown - sys_lchown16 -19 common lseek sys_lseek compat_sys_lseek -20 common getpid sys_getpid sys_getpid -21 common mount sys_mount sys_mount -22 common umount sys_oldumount sys_oldumount -23 32 setuid - sys_setuid16 -24 32 getuid - sys_getuid16 -25 32 stime - sys_stime32 -26 common ptrace sys_ptrace compat_sys_ptrace -27 common alarm sys_alarm sys_alarm -29 common pause sys_pause sys_pause -30 common utime sys_utime sys_utime32 -33 common access sys_access sys_access -34 common nice sys_nice sys_nice -36 common sync sys_sync sys_sync -37 common kill sys_kill sys_kill -38 common rename sys_rename sys_rename -39 common mkdir sys_mkdir sys_mkdir -40 common rmdir sys_rmdir sys_rmdir -41 common dup sys_dup sys_dup -42 common pipe sys_pipe sys_pipe -43 common times sys_times compat_sys_times -45 common brk sys_brk sys_brk -46 32 setgid - sys_setgid16 -47 32 getgid - sys_getgid16 -48 common signal sys_signal sys_signal -49 32 geteuid - sys_geteuid16 -50 32 getegid - sys_getegid16 -51 common acct sys_acct sys_acct -52 common umount2 sys_umount sys_umount -54 common ioctl sys_ioctl compat_sys_ioctl -55 common fcntl sys_fcntl compat_sys_fcntl -57 common setpgid sys_setpgid sys_setpgid -60 common umask sys_umask sys_umask -61 common chroot sys_chroot sys_chroot -62 common ustat sys_ustat compat_sys_ustat -63 common dup2 sys_dup2 sys_dup2 -64 common getppid sys_getppid sys_getppid -65 common getpgrp sys_getpgrp sys_getpgrp -66 common setsid sys_setsid sys_setsid -67 common sigaction sys_sigaction compat_sys_sigaction -70 32 setreuid - sys_setreuid16 -71 32 setregid - sys_setregid16 -72 common sigsuspend sys_sigsuspend sys_sigsuspend -73 common sigpending sys_sigpending compat_sys_sigpending -74 common sethostname sys_sethostname sys_sethostname -75 common setrlimit sys_setrlimit compat_sys_setrlimit -76 32 getrlimit - compat_sys_old_getrlimit -77 common getrusage sys_getrusage compat_sys_getrusage -78 common gettimeofday sys_gettimeofday compat_sys_gettimeofday -79 common settimeofday sys_settimeofday compat_sys_settimeofday -80 32 getgroups - sys_getgroups16 -81 32 setgroups - sys_setgroups16 -83 common symlink sys_symlink sys_symlink -85 common readlink sys_readlink sys_readlink -86 common uselib sys_uselib sys_uselib -87 common swapon sys_swapon sys_swapon -88 common reboot sys_reboot sys_reboot -89 common readdir - compat_sys_old_readdir -90 common mmap sys_old_mmap compat_sys_s390_old_mmap -91 common munmap sys_munmap sys_munmap -92 common truncate sys_truncate compat_sys_truncate -93 common ftruncate sys_ftruncate compat_sys_ftruncate -94 common fchmod sys_fchmod sys_fchmod -95 32 fchown - sys_fchown16 -96 common getpriority sys_getpriority sys_getpriority -97 common setpriority sys_setpriority sys_setpriority -99 common statfs sys_statfs compat_sys_statfs -100 common fstatfs sys_fstatfs compat_sys_fstatfs -101 32 ioperm - - -102 common socketcall sys_socketcall compat_sys_socketcall -103 common syslog sys_syslog sys_syslog -104 common setitimer sys_setitimer compat_sys_setitimer -105 common getitimer sys_getitimer compat_sys_getitimer -106 common stat sys_newstat compat_sys_newstat -107 common lstat sys_newlstat compat_sys_newlstat -108 common fstat sys_newfstat compat_sys_newfstat -110 common lookup_dcookie - - -111 common vhangup sys_vhangup sys_vhangup -112 common idle - - -114 common wait4 sys_wait4 compat_sys_wait4 -115 common swapoff sys_swapoff sys_swapoff -116 common sysinfo sys_sysinfo compat_sys_sysinfo -117 common ipc sys_s390_ipc compat_sys_s390_ipc -118 common fsync sys_fsync sys_fsync -119 common sigreturn sys_sigreturn compat_sys_sigreturn -120 common clone sys_clone sys_clone -121 common setdomainname sys_setdomainname sys_setdomainname -122 common uname sys_newuname sys_newuname -124 common adjtimex sys_adjtimex sys_adjtimex_time32 -125 common mprotect sys_mprotect sys_mprotect -126 common sigprocmask sys_sigprocmask compat_sys_sigprocmask -127 common create_module - - -128 common init_module sys_init_module sys_init_module -129 common delete_module sys_delete_module sys_delete_module -130 common get_kernel_syms - - -131 common quotactl sys_quotactl sys_quotactl -132 common getpgid sys_getpgid sys_getpgid -133 common fchdir sys_fchdir sys_fchdir -134 common bdflush sys_ni_syscall sys_ni_syscall -135 common sysfs sys_sysfs sys_sysfs -136 common personality sys_s390_personality sys_s390_personality -137 common afs_syscall - - -138 32 setfsuid - sys_setfsuid16 -139 32 setfsgid - sys_setfsgid16 -140 32 _llseek - sys_llseek -141 common getdents sys_getdents compat_sys_getdents -142 32 _newselect - compat_sys_select -142 64 select sys_select - -143 common flock sys_flock sys_flock -144 common msync sys_msync sys_msync -145 common readv sys_readv sys_readv -146 common writev sys_writev sys_writev -147 common getsid sys_getsid sys_getsid -148 common fdatasync sys_fdatasync sys_fdatasync -149 common _sysctl - - -150 common mlock sys_mlock sys_mlock -151 common munlock sys_munlock sys_munlock -152 common mlockall sys_mlockall sys_mlockall -153 common munlockall sys_munlockall sys_munlockall -154 common sched_setparam sys_sched_setparam sys_sched_setparam -155 common sched_getparam sys_sched_getparam sys_sched_getparam -156 common sched_setscheduler sys_sched_setscheduler sys_sched_setscheduler -157 common sched_getscheduler sys_sched_getscheduler sys_sched_getscheduler -158 common sched_yield sys_sched_yield sys_sched_yield -159 common sched_get_priority_max sys_sched_get_priority_max sys_sched_get_priority_max -160 common sched_get_priority_min sys_sched_get_priority_min sys_sched_get_priority_min -161 common sched_rr_get_interval sys_sched_rr_get_interval sys_sched_rr_get_interval_time32 -162 common nanosleep sys_nanosleep sys_nanosleep_time32 -163 common mremap sys_mremap sys_mremap -164 32 setresuid - sys_setresuid16 -165 32 getresuid - sys_getresuid16 -167 common query_module - - -168 common poll sys_poll sys_poll -169 common nfsservctl - - -170 32 setresgid - sys_setresgid16 -171 32 getresgid - sys_getresgid16 -172 common prctl sys_prctl sys_prctl -173 common rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn -174 common rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction -175 common rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask -176 common rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending -177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time32 -178 common rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo -179 common rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend -180 common pread64 sys_pread64 compat_sys_s390_pread64 -181 common pwrite64 sys_pwrite64 compat_sys_s390_pwrite64 -182 32 chown - sys_chown16 -183 common getcwd sys_getcwd sys_getcwd -184 common capget sys_capget sys_capget -185 common capset sys_capset sys_capset -186 common sigaltstack sys_sigaltstack compat_sys_sigaltstack -187 common sendfile sys_sendfile64 compat_sys_sendfile -188 common getpmsg - - -189 common putpmsg - - -190 common vfork sys_vfork sys_vfork -191 32 ugetrlimit - compat_sys_getrlimit -191 64 getrlimit sys_getrlimit - -192 32 mmap2 - compat_sys_s390_mmap2 -193 32 truncate64 - compat_sys_s390_truncate64 -194 32 ftruncate64 - compat_sys_s390_ftruncate64 -195 32 stat64 - compat_sys_s390_stat64 -196 32 lstat64 - compat_sys_s390_lstat64 -197 32 fstat64 - compat_sys_s390_fstat64 -198 32 lchown32 - sys_lchown -198 64 lchown sys_lchown - -199 32 getuid32 - sys_getuid -199 64 getuid sys_getuid - -200 32 getgid32 - sys_getgid -200 64 getgid sys_getgid - -201 32 geteuid32 - sys_geteuid -201 64 geteuid sys_geteuid - -202 32 getegid32 - sys_getegid -202 64 getegid sys_getegid - -203 32 setreuid32 - sys_setreuid -203 64 setreuid sys_setreuid - -204 32 setregid32 - sys_setregid -204 64 setregid sys_setregid - -205 32 getgroups32 - sys_getgroups -205 64 getgroups sys_getgroups - -206 32 setgroups32 - sys_setgroups -206 64 setgroups sys_setgroups - -207 32 fchown32 - sys_fchown -207 64 fchown sys_fchown - -208 32 setresuid32 - sys_setresuid -208 64 setresuid sys_setresuid - -209 32 getresuid32 - sys_getresuid -209 64 getresuid sys_getresuid - -210 32 setresgid32 - sys_setresgid -210 64 setresgid sys_setresgid - -211 32 getresgid32 - sys_getresgid -211 64 getresgid sys_getresgid - -212 32 chown32 - sys_chown -212 64 chown sys_chown - -213 32 setuid32 - sys_setuid -213 64 setuid sys_setuid - -214 32 setgid32 - sys_setgid -214 64 setgid sys_setgid - -215 32 setfsuid32 - sys_setfsuid -215 64 setfsuid sys_setfsuid - -216 32 setfsgid32 - sys_setfsgid -216 64 setfsgid sys_setfsgid - -217 common pivot_root sys_pivot_root sys_pivot_root -218 common mincore sys_mincore sys_mincore -219 common madvise sys_madvise sys_madvise -220 common getdents64 sys_getdents64 sys_getdents64 -221 32 fcntl64 - compat_sys_fcntl64 -222 common readahead sys_readahead compat_sys_s390_readahead -223 32 sendfile64 - compat_sys_sendfile64 -224 common setxattr sys_setxattr sys_setxattr -225 common lsetxattr sys_lsetxattr sys_lsetxattr -226 common fsetxattr sys_fsetxattr sys_fsetxattr -227 common getxattr sys_getxattr sys_getxattr -228 common lgetxattr sys_lgetxattr sys_lgetxattr -229 common fgetxattr sys_fgetxattr sys_fgetxattr -230 common listxattr sys_listxattr sys_listxattr -231 common llistxattr sys_llistxattr sys_llistxattr -232 common flistxattr sys_flistxattr sys_flistxattr -233 common removexattr sys_removexattr sys_removexattr -234 common lremovexattr sys_lremovexattr sys_lremovexattr -235 common fremovexattr sys_fremovexattr sys_fremovexattr -236 common gettid sys_gettid sys_gettid -237 common tkill sys_tkill sys_tkill -238 common futex sys_futex sys_futex_time32 -239 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity -240 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity -241 common tgkill sys_tgkill sys_tgkill -243 common io_setup sys_io_setup compat_sys_io_setup -244 common io_destroy sys_io_destroy sys_io_destroy -245 common io_getevents sys_io_getevents sys_io_getevents_time32 -246 common io_submit sys_io_submit compat_sys_io_submit -247 common io_cancel sys_io_cancel sys_io_cancel -248 common exit_group sys_exit_group sys_exit_group -249 common epoll_create sys_epoll_create sys_epoll_create -250 common epoll_ctl sys_epoll_ctl sys_epoll_ctl -251 common epoll_wait sys_epoll_wait sys_epoll_wait -252 common set_tid_address sys_set_tid_address sys_set_tid_address -253 common fadvise64 sys_fadvise64_64 compat_sys_s390_fadvise64 -254 common timer_create sys_timer_create compat_sys_timer_create -255 common timer_settime sys_timer_settime sys_timer_settime32 -256 common timer_gettime sys_timer_gettime sys_timer_gettime32 -257 common timer_getoverrun sys_timer_getoverrun sys_timer_getoverrun -258 common timer_delete sys_timer_delete sys_timer_delete -259 common clock_settime sys_clock_settime sys_clock_settime32 -260 common clock_gettime sys_clock_gettime sys_clock_gettime32 -261 common clock_getres sys_clock_getres sys_clock_getres_time32 -262 common clock_nanosleep sys_clock_nanosleep sys_clock_nanosleep_time32 -264 32 fadvise64_64 - compat_sys_s390_fadvise64_64 -265 common statfs64 sys_statfs64 compat_sys_statfs64 -266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 -267 common remap_file_pages sys_remap_file_pages sys_remap_file_pages -268 common mbind sys_mbind sys_mbind -269 common get_mempolicy sys_get_mempolicy sys_get_mempolicy -270 common set_mempolicy sys_set_mempolicy sys_set_mempolicy -271 common mq_open sys_mq_open compat_sys_mq_open -272 common mq_unlink sys_mq_unlink sys_mq_unlink -273 common mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32 -274 common mq_timedreceive sys_mq_timedreceive sys_mq_timedreceive_time32 -275 common mq_notify sys_mq_notify compat_sys_mq_notify -276 common mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr -277 common kexec_load sys_kexec_load compat_sys_kexec_load -278 common add_key sys_add_key sys_add_key -279 common request_key sys_request_key sys_request_key -280 common keyctl sys_keyctl compat_sys_keyctl -281 common waitid sys_waitid compat_sys_waitid -282 common ioprio_set sys_ioprio_set sys_ioprio_set -283 common ioprio_get sys_ioprio_get sys_ioprio_get -284 common inotify_init sys_inotify_init sys_inotify_init -285 common inotify_add_watch sys_inotify_add_watch sys_inotify_add_watch -286 common inotify_rm_watch sys_inotify_rm_watch sys_inotify_rm_watch -287 common migrate_pages sys_migrate_pages sys_migrate_pages -288 common openat sys_openat compat_sys_openat -289 common mkdirat sys_mkdirat sys_mkdirat -290 common mknodat sys_mknodat sys_mknodat -291 common fchownat sys_fchownat sys_fchownat -292 common futimesat sys_futimesat sys_futimesat_time32 -293 32 fstatat64 - compat_sys_s390_fstatat64 -293 64 newfstatat sys_newfstatat - -294 common unlinkat sys_unlinkat sys_unlinkat -295 common renameat sys_renameat sys_renameat -296 common linkat sys_linkat sys_linkat -297 common symlinkat sys_symlinkat sys_symlinkat -298 common readlinkat sys_readlinkat sys_readlinkat -299 common fchmodat sys_fchmodat sys_fchmodat -300 common faccessat sys_faccessat sys_faccessat -301 common pselect6 sys_pselect6 compat_sys_pselect6_time32 -302 common ppoll sys_ppoll compat_sys_ppoll_time32 -303 common unshare sys_unshare sys_unshare -304 common set_robust_list sys_set_robust_list compat_sys_set_robust_list -305 common get_robust_list sys_get_robust_list compat_sys_get_robust_list -306 common splice sys_splice sys_splice -307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range -308 common tee sys_tee sys_tee -309 common vmsplice sys_vmsplice sys_vmsplice -310 common move_pages sys_move_pages sys_move_pages -311 common getcpu sys_getcpu sys_getcpu -312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait -313 common utimes sys_utimes sys_utimes_time32 -314 common fallocate sys_fallocate compat_sys_s390_fallocate -315 common utimensat sys_utimensat sys_utimensat_time32 -316 common signalfd sys_signalfd compat_sys_signalfd -317 common timerfd - - -318 common eventfd sys_eventfd sys_eventfd -319 common timerfd_create sys_timerfd_create sys_timerfd_create -320 common timerfd_settime sys_timerfd_settime sys_timerfd_settime32 -321 common timerfd_gettime sys_timerfd_gettime sys_timerfd_gettime32 -322 common signalfd4 sys_signalfd4 compat_sys_signalfd4 -323 common eventfd2 sys_eventfd2 sys_eventfd2 -324 common inotify_init1 sys_inotify_init1 sys_inotify_init1 -325 common pipe2 sys_pipe2 sys_pipe2 -326 common dup3 sys_dup3 sys_dup3 -327 common epoll_create1 sys_epoll_create1 sys_epoll_create1 -328 common preadv sys_preadv compat_sys_preadv -329 common pwritev sys_pwritev compat_sys_pwritev -330 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo -331 common perf_event_open sys_perf_event_open sys_perf_event_open -332 common fanotify_init sys_fanotify_init sys_fanotify_init -333 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark -334 common prlimit64 sys_prlimit64 sys_prlimit64 -335 common name_to_handle_at sys_name_to_handle_at sys_name_to_handle_at -336 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at -337 common clock_adjtime sys_clock_adjtime sys_clock_adjtime32 -338 common syncfs sys_syncfs sys_syncfs -339 common setns sys_setns sys_setns -340 common process_vm_readv sys_process_vm_readv sys_process_vm_readv -341 common process_vm_writev sys_process_vm_writev sys_process_vm_writev -342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr -343 common kcmp sys_kcmp sys_kcmp -344 common finit_module sys_finit_module sys_finit_module -345 common sched_setattr sys_sched_setattr sys_sched_setattr -346 common sched_getattr sys_sched_getattr sys_sched_getattr -347 common renameat2 sys_renameat2 sys_renameat2 -348 common seccomp sys_seccomp sys_seccomp -349 common getrandom sys_getrandom sys_getrandom -350 common memfd_create sys_memfd_create sys_memfd_create -351 common bpf sys_bpf sys_bpf -352 common s390_pci_mmio_write sys_s390_pci_mmio_write sys_s390_pci_mmio_write -353 common s390_pci_mmio_read sys_s390_pci_mmio_read sys_s390_pci_mmio_read -354 common execveat sys_execveat compat_sys_execveat -355 common userfaultfd sys_userfaultfd sys_userfaultfd -356 common membarrier sys_membarrier sys_membarrier -357 common recvmmsg sys_recvmmsg compat_sys_recvmmsg_time32 -358 common sendmmsg sys_sendmmsg compat_sys_sendmmsg -359 common socket sys_socket sys_socket -360 common socketpair sys_socketpair sys_socketpair -361 common bind sys_bind sys_bind -362 common connect sys_connect sys_connect -363 common listen sys_listen sys_listen -364 common accept4 sys_accept4 sys_accept4 -365 common getsockopt sys_getsockopt sys_getsockopt -366 common setsockopt sys_setsockopt sys_setsockopt -367 common getsockname sys_getsockname sys_getsockname -368 common getpeername sys_getpeername sys_getpeername -369 common sendto sys_sendto sys_sendto -370 common sendmsg sys_sendmsg compat_sys_sendmsg -371 common recvfrom sys_recvfrom compat_sys_recvfrom -372 common recvmsg sys_recvmsg compat_sys_recvmsg -373 common shutdown sys_shutdown sys_shutdown -374 common mlock2 sys_mlock2 sys_mlock2 -375 common copy_file_range sys_copy_file_range sys_copy_file_range -376 common preadv2 sys_preadv2 compat_sys_preadv2 -377 common pwritev2 sys_pwritev2 compat_sys_pwritev2 -378 common s390_guarded_storage sys_s390_guarded_storage sys_s390_guarded_storage -379 common statx sys_statx sys_statx -380 common s390_sthyi sys_s390_sthyi sys_s390_sthyi -381 common kexec_file_load sys_kexec_file_load sys_kexec_file_load -382 common io_pgetevents sys_io_pgetevents compat_sys_io_pgetevents -383 common rseq sys_rseq sys_rseq -384 common pkey_mprotect sys_pkey_mprotect sys_pkey_mprotect -385 common pkey_alloc sys_pkey_alloc sys_pkey_alloc -386 common pkey_free sys_pkey_free sys_pkey_free +1 common exit sys_exit +2 common fork sys_fork +3 common read sys_read +4 common write sys_write +5 common open sys_open +6 common close sys_close +7 common restart_syscall sys_restart_syscall +8 common creat sys_creat +9 common link sys_link +10 common unlink sys_unlink +11 common execve sys_execve +12 common chdir sys_chdir +14 common mknod sys_mknod +15 common chmod sys_chmod +19 common lseek sys_lseek +20 common getpid sys_getpid +21 common mount sys_mount +22 common umount sys_oldumount +26 common ptrace sys_ptrace +27 common alarm sys_alarm +29 common pause sys_pause +30 common utime sys_utime +33 common access sys_access +34 common nice sys_nice +36 common sync sys_sync +37 common kill sys_kill +38 common rename sys_rename +39 common mkdir sys_mkdir +40 common rmdir sys_rmdir +41 common dup sys_dup +42 common pipe sys_pipe +43 common times sys_times +45 common brk sys_brk +48 common signal sys_signal +51 common acct sys_acct +52 common umount2 sys_umount +54 common ioctl sys_ioctl +55 common fcntl sys_fcntl +57 common setpgid sys_setpgid +60 common umask sys_umask +61 common chroot sys_chroot +62 common ustat sys_ustat +63 common dup2 sys_dup2 +64 common getppid sys_getppid +65 common getpgrp sys_getpgrp +66 common setsid sys_setsid +67 common sigaction sys_sigaction +72 common sigsuspend sys_sigsuspend +73 common sigpending sys_sigpending +74 common sethostname sys_sethostname +75 common setrlimit sys_setrlimit +77 common getrusage sys_getrusage +78 common gettimeofday sys_gettimeofday +79 common settimeofday sys_settimeofday +83 common symlink sys_symlink +85 common readlink sys_readlink +86 common uselib sys_uselib +87 common swapon sys_swapon +88 common reboot sys_reboot +89 common readdir sys_ni_syscall +90 common mmap sys_old_mmap +91 common munmap sys_munmap +92 common truncate sys_truncate +93 common ftruncate sys_ftruncate +94 common fchmod sys_fchmod +96 common getpriority sys_getpriority +97 common setpriority sys_setpriority +99 common statfs sys_statfs +100 common fstatfs sys_fstatfs +102 common socketcall sys_socketcall +103 common syslog sys_syslog +104 common setitimer sys_setitimer +105 common getitimer sys_getitimer +106 common stat sys_newstat +107 common lstat sys_newlstat +108 common fstat sys_newfstat +110 common lookup_dcookie sys_ni_syscall +111 common vhangup sys_vhangup +112 common idle sys_ni_syscall +114 common wait4 sys_wait4 +115 common swapoff sys_swapoff +116 common sysinfo sys_sysinfo +117 common ipc sys_s390_ipc +118 common fsync sys_fsync +119 common sigreturn sys_sigreturn +120 common clone sys_clone +121 common setdomainname sys_setdomainname +122 common uname sys_newuname +124 common adjtimex sys_adjtimex +125 common mprotect sys_mprotect +126 common sigprocmask sys_sigprocmask +127 common create_module sys_ni_syscall +128 common init_module sys_init_module +129 common delete_module sys_delete_module +130 common get_kernel_syms sys_ni_syscall +131 common quotactl sys_quotactl +132 common getpgid sys_getpgid +133 common fchdir sys_fchdir +134 common bdflush sys_ni_syscall +135 common sysfs sys_sysfs +136 common personality sys_s390_personality +137 common afs_syscall sys_ni_syscall +141 common getdents sys_getdents +142 common select sys_select +143 common flock sys_flock +144 common msync sys_msync +145 common readv sys_readv +146 common writev sys_writev +147 common getsid sys_getsid +148 common fdatasync sys_fdatasync +149 common _sysctl sys_ni_syscall +150 common mlock sys_mlock +151 common munlock sys_munlock +152 common mlockall sys_mlockall +153 common munlockall sys_munlockall +154 common sched_setparam sys_sched_setparam +155 common sched_getparam sys_sched_getparam +156 common sched_setscheduler sys_sched_setscheduler +157 common sched_getscheduler sys_sched_getscheduler +158 common sched_yield sys_sched_yield +159 common sched_get_priority_max sys_sched_get_priority_max +160 common sched_get_priority_min sys_sched_get_priority_min +161 common sched_rr_get_interval sys_sched_rr_get_interval +162 common nanosleep sys_nanosleep +163 common mremap sys_mremap +167 common query_module sys_ni_syscall +168 common poll sys_poll +169 common nfsservctl sys_ni_syscall +172 common prctl sys_prctl +173 common rt_sigreturn sys_rt_sigreturn +174 common rt_sigaction sys_rt_sigaction +175 common rt_sigprocmask sys_rt_sigprocmask +176 common rt_sigpending sys_rt_sigpending +177 common rt_sigtimedwait sys_rt_sigtimedwait +178 common rt_sigqueueinfo sys_rt_sigqueueinfo +179 common rt_sigsuspend sys_rt_sigsuspend +180 common pread64 sys_pread64 +181 common pwrite64 sys_pwrite64 +183 common getcwd sys_getcwd +184 common capget sys_capget +185 common capset sys_capset +186 common sigaltstack sys_sigaltstack +187 common sendfile sys_sendfile64 +188 common getpmsg sys_ni_syscall +189 common putpmsg sys_ni_syscall +190 common vfork sys_vfork +191 common getrlimit sys_getrlimit +198 common lchown sys_lchown +199 common getuid sys_getuid +200 common getgid sys_getgid +201 common geteuid sys_geteuid +202 common getegid sys_getegid +203 common setreuid sys_setreuid +204 common setregid sys_setregid +205 common getgroups sys_getgroups +206 common setgroups sys_setgroups +207 common fchown sys_fchown +208 common setresuid sys_setresuid +209 common getresuid sys_getresuid +210 common setresgid sys_setresgid +211 common getresgid sys_getresgid +212 common chown sys_chown +213 common setuid sys_setuid +214 common setgid sys_setgid +215 common setfsuid sys_setfsuid +216 common setfsgid sys_setfsgid +217 common pivot_root sys_pivot_root +218 common mincore sys_mincore +219 common madvise sys_madvise +220 common getdents64 sys_getdents64 +222 common readahead sys_readahead +224 common setxattr sys_setxattr +225 common lsetxattr sys_lsetxattr +226 common fsetxattr sys_fsetxattr +227 common getxattr sys_getxattr +228 common lgetxattr sys_lgetxattr +229 common fgetxattr sys_fgetxattr +230 common listxattr sys_listxattr +231 common llistxattr sys_llistxattr +232 common flistxattr sys_flistxattr +233 common removexattr sys_removexattr +234 common lremovexattr sys_lremovexattr +235 common fremovexattr sys_fremovexattr +236 common gettid sys_gettid +237 common tkill sys_tkill +238 common futex sys_futex +239 common sched_setaffinity sys_sched_setaffinity +240 common sched_getaffinity sys_sched_getaffinity +241 common tgkill sys_tgkill +243 common io_setup sys_io_setup +244 common io_destroy sys_io_destroy +245 common io_getevents sys_io_getevents +246 common io_submit sys_io_submit +247 common io_cancel sys_io_cancel +248 common exit_group sys_exit_group +249 common epoll_create sys_epoll_create +250 common epoll_ctl sys_epoll_ctl +251 common epoll_wait sys_epoll_wait +252 common set_tid_address sys_set_tid_address +253 common fadvise64 sys_fadvise64_64 +254 common timer_create sys_timer_create +255 common timer_settime sys_timer_settime +256 common timer_gettime sys_timer_gettime +257 common timer_getoverrun sys_timer_getoverrun +258 common timer_delete sys_timer_delete +259 common clock_settime sys_clock_settime +260 common clock_gettime sys_clock_gettime +261 common clock_getres sys_clock_getres +262 common clock_nanosleep sys_clock_nanosleep +265 common statfs64 sys_statfs64 +266 common fstatfs64 sys_fstatfs64 +267 common remap_file_pages sys_remap_file_pages +268 common mbind sys_mbind +269 common get_mempolicy sys_get_mempolicy +270 common set_mempolicy sys_set_mempolicy +271 common mq_open sys_mq_open +272 common mq_unlink sys_mq_unlink +273 common mq_timedsend sys_mq_timedsend +274 common mq_timedreceive sys_mq_timedreceive +275 common mq_notify sys_mq_notify +276 common mq_getsetattr sys_mq_getsetattr +277 common kexec_load sys_kexec_load +278 common add_key sys_add_key +279 common request_key sys_request_key +280 common keyctl sys_keyctl +281 common waitid sys_waitid +282 common ioprio_set sys_ioprio_set +283 common ioprio_get sys_ioprio_get +284 common inotify_init sys_inotify_init +285 common inotify_add_watch sys_inotify_add_watch +286 common inotify_rm_watch sys_inotify_rm_watch +287 common migrate_pages sys_migrate_pages +288 common openat sys_openat +289 common mkdirat sys_mkdirat +290 common mknodat sys_mknodat +291 common fchownat sys_fchownat +292 common futimesat sys_futimesat +293 common newfstatat sys_newfstatat +294 common unlinkat sys_unlinkat +295 common renameat sys_renameat +296 common linkat sys_linkat +297 common symlinkat sys_symlinkat +298 common readlinkat sys_readlinkat +299 common fchmodat sys_fchmodat +300 common faccessat sys_faccessat +301 common pselect6 sys_pselect6 +302 common ppoll sys_ppoll +303 common unshare sys_unshare +304 common set_robust_list sys_set_robust_list +305 common get_robust_list sys_get_robust_list +306 common splice sys_splice +307 common sync_file_range sys_sync_file_range +308 common tee sys_tee +309 common vmsplice sys_vmsplice +310 common move_pages sys_move_pages +311 common getcpu sys_getcpu +312 common epoll_pwait sys_epoll_pwait +313 common utimes sys_utimes +314 common fallocate sys_fallocate +315 common utimensat sys_utimensat +316 common signalfd sys_signalfd +317 common timerfd sys_ni_syscall +318 common eventfd sys_eventfd +319 common timerfd_create sys_timerfd_create +320 common timerfd_settime sys_timerfd_settime +321 common timerfd_gettime sys_timerfd_gettime +322 common signalfd4 sys_signalfd4 +323 common eventfd2 sys_eventfd2 +324 common inotify_init1 sys_inotify_init1 +325 common pipe2 sys_pipe2 +326 common dup3 sys_dup3 +327 common epoll_create1 sys_epoll_create1 +328 common preadv sys_preadv +329 common pwritev sys_pwritev +330 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo +331 common perf_event_open sys_perf_event_open +332 common fanotify_init sys_fanotify_init +333 common fanotify_mark sys_fanotify_mark +334 common prlimit64 sys_prlimit64 +335 common name_to_handle_at sys_name_to_handle_at +336 common open_by_handle_at sys_open_by_handle_at +337 common clock_adjtime sys_clock_adjtime +338 common syncfs sys_syncfs +339 common setns sys_setns +340 common process_vm_readv sys_process_vm_readv +341 common process_vm_writev sys_process_vm_writev +342 common s390_runtime_instr sys_s390_runtime_instr +343 common kcmp sys_kcmp +344 common finit_module sys_finit_module +345 common sched_setattr sys_sched_setattr +346 common sched_getattr sys_sched_getattr +347 common renameat2 sys_renameat2 +348 common seccomp sys_seccomp +349 common getrandom sys_getrandom +350 common memfd_create sys_memfd_create +351 common bpf sys_bpf +352 common s390_pci_mmio_write sys_s390_pci_mmio_write +353 common s390_pci_mmio_read sys_s390_pci_mmio_read +354 common execveat sys_execveat +355 common userfaultfd sys_userfaultfd +356 common membarrier sys_membarrier +357 common recvmmsg sys_recvmmsg +358 common sendmmsg sys_sendmmsg +359 common socket sys_socket +360 common socketpair sys_socketpair +361 common bind sys_bind +362 common connect sys_connect +363 common listen sys_listen +364 common accept4 sys_accept4 +365 common getsockopt sys_getsockopt +366 common setsockopt sys_setsockopt +367 common getsockname sys_getsockname +368 common getpeername sys_getpeername +369 common sendto sys_sendto +370 common sendmsg sys_sendmsg +371 common recvfrom sys_recvfrom +372 common recvmsg sys_recvmsg +373 common shutdown sys_shutdown +374 common mlock2 sys_mlock2 +375 common copy_file_range sys_copy_file_range +376 common preadv2 sys_preadv2 +377 common pwritev2 sys_pwritev2 +378 common s390_guarded_storage sys_s390_guarded_storage +379 common statx sys_statx +380 common s390_sthyi sys_s390_sthyi +381 common kexec_file_load sys_kexec_file_load +382 common io_pgetevents sys_io_pgetevents +383 common rseq sys_rseq +384 common pkey_mprotect sys_pkey_mprotect +385 common pkey_alloc sys_pkey_alloc +386 common pkey_free sys_pkey_free # room for arch specific syscalls -392 64 semtimedop sys_semtimedop - -393 common semget sys_semget sys_semget -394 common semctl sys_semctl compat_sys_semctl -395 common shmget sys_shmget sys_shmget -396 common shmctl sys_shmctl compat_sys_shmctl -397 common shmat sys_shmat compat_sys_shmat -398 common shmdt sys_shmdt sys_shmdt -399 common msgget sys_msgget sys_msgget -400 common msgsnd sys_msgsnd compat_sys_msgsnd -401 common msgrcv sys_msgrcv compat_sys_msgrcv -402 common msgctl sys_msgctl compat_sys_msgctl -403 32 clock_gettime64 - sys_clock_gettime -404 32 clock_settime64 - sys_clock_settime -405 32 clock_adjtime64 - sys_clock_adjtime -406 32 clock_getres_time64 - sys_clock_getres -407 32 clock_nanosleep_time64 - sys_clock_nanosleep -408 32 timer_gettime64 - sys_timer_gettime -409 32 timer_settime64 - sys_timer_settime -410 32 timerfd_gettime64 - sys_timerfd_gettime -411 32 timerfd_settime64 - sys_timerfd_settime -412 32 utimensat_time64 - sys_utimensat -413 32 pselect6_time64 - compat_sys_pselect6_time64 -414 32 ppoll_time64 - compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64 -417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64 -418 32 mq_timedsend_time64 - sys_mq_timedsend -419 32 mq_timedreceive_time64 - sys_mq_timedreceive -420 32 semtimedop_time64 - sys_semtimedop -421 32 rt_sigtimedwait_time64 - compat_sys_rt_sigtimedwait_time64 -422 32 futex_time64 - sys_futex -423 32 sched_rr_get_interval_time64 - sys_sched_rr_get_interval -424 common pidfd_send_signal sys_pidfd_send_signal sys_pidfd_send_signal -425 common io_uring_setup sys_io_uring_setup sys_io_uring_setup -426 common io_uring_enter sys_io_uring_enter sys_io_uring_enter -427 common io_uring_register sys_io_uring_register sys_io_uring_register -428 common open_tree sys_open_tree sys_open_tree -429 common move_mount sys_move_mount sys_move_mount -430 common fsopen sys_fsopen sys_fsopen -431 common fsconfig sys_fsconfig sys_fsconfig -432 common fsmount sys_fsmount sys_fsmount -433 common fspick sys_fspick sys_fspick -434 common pidfd_open sys_pidfd_open sys_pidfd_open -435 common clone3 sys_clone3 sys_clone3 -436 common close_range sys_close_range sys_close_range -437 common openat2 sys_openat2 sys_openat2 -438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd -439 common faccessat2 sys_faccessat2 sys_faccessat2 -440 common process_madvise sys_process_madvise sys_process_madvise -441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 -442 common mount_setattr sys_mount_setattr sys_mount_setattr -443 common quotactl_fd sys_quotactl_fd sys_quotactl_fd -444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset -445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule -446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self -447 common memfd_secret sys_memfd_secret sys_memfd_secret -448 common process_mrelease sys_process_mrelease sys_process_mrelease -449 common futex_waitv sys_futex_waitv sys_futex_waitv -450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node -451 common cachestat sys_cachestat sys_cachestat -452 common fchmodat2 sys_fchmodat2 sys_fchmodat2 -453 common map_shadow_stack sys_map_shadow_stack sys_map_shadow_stack -454 common futex_wake sys_futex_wake sys_futex_wake -455 common futex_wait sys_futex_wait sys_futex_wait -456 common futex_requeue sys_futex_requeue sys_futex_requeue -457 common statmount sys_statmount sys_statmount -458 common listmount sys_listmount sys_listmount -459 common lsm_get_self_attr sys_lsm_get_self_attr sys_lsm_get_self_attr -460 common lsm_set_self_attr sys_lsm_set_self_attr sys_lsm_set_self_attr -461 common lsm_list_modules sys_lsm_list_modules sys_lsm_list_modules -462 common mseal sys_mseal sys_mseal -463 common setxattrat sys_setxattrat sys_setxattrat -464 common getxattrat sys_getxattrat sys_getxattrat -465 common listxattrat sys_listxattrat sys_listxattrat -466 common removexattrat sys_removexattrat sys_removexattrat +392 common semtimedop sys_semtimedop +393 common semget sys_semget +394 common semctl sys_semctl +395 common shmget sys_shmget +396 common shmctl sys_shmctl +397 common shmat sys_shmat +398 common shmdt sys_shmdt +399 common msgget sys_msgget +400 common msgsnd sys_msgsnd +401 common msgrcv sys_msgrcv +402 common msgctl sys_msgctl +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register +428 common open_tree sys_open_tree +429 common move_mount sys_move_mount +430 common fsopen sys_fsopen +431 common fsconfig sys_fsconfig +432 common fsmount sys_fsmount +433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open +435 common clone3 sys_clone3 +436 common close_range sys_close_range +437 common openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 +440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr +443 common quotactl_fd sys_quotactl_fd +444 common landlock_create_ruleset sys_landlock_create_ruleset +445 common landlock_add_rule sys_landlock_add_rule +446 common landlock_restrict_self sys_landlock_restrict_self +447 common memfd_secret sys_memfd_secret +448 common process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv +450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 +453 common map_shadow_stack sys_map_shadow_stack +454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue +457 common statmount sys_statmount +458 common listmount sys_listmount +459 common lsm_get_self_attr sys_lsm_get_self_attr +460 common lsm_set_self_attr sys_lsm_set_self_attr +461 common lsm_list_modules sys_lsm_list_modules +462 common mseal sys_mseal +463 common setxattrat sys_setxattrat +464 common getxattrat sys_getxattrat +465 common listxattrat sys_listxattrat +466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/s390/kernel/syscalls/syscalltbl b/arch/s390/kernel/syscalls/syscalltbl deleted file mode 100755 index fbac1732f874..000000000000 --- a/arch/s390/kernel/syscalls/syscalltbl +++ /dev/null @@ -1,232 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# -# Generate system call table and header files -# -# Copyright IBM Corp. 2018 -# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> - -# -# File path to the system call table definition. -# You can set the path with the -i option. If omitted, -# system call table definitions are read from standard input. -# -SYSCALL_TBL="" - - -create_syscall_table_entries() -{ - local nr abi name entry64 entry32 _ignore - local temp=$(mktemp ${TMPDIR:-/tmp}/syscalltbl-common.XXXXXXXXX) - - ( - # - # Initialize with 0 to create an NI_SYSCALL for 0 - # - local prev_nr=0 prev_32=sys_ni_syscall prev_64=sys_ni_syscall - while read nr abi name entry64 entry32 _ignore; do - test x$entry32 = x- && entry32=sys_ni_syscall - test x$entry64 = x- && entry64=sys_ni_syscall - - if test $prev_nr -eq $nr; then - # - # Same syscall but different ABI, just update - # the respective entry point - # - case $abi in - 32) - prev_32=$entry32 - ;; - 64) - prev_64=$entry64 - ;; - esac - continue; - else - printf "%d\t%s\t%s\n" $prev_nr $prev_64 $prev_32 - fi - - prev_nr=$nr - prev_64=$entry64 - prev_32=$entry32 - done - printf "%d\t%s\t%s\n" $prev_nr $prev_64 $prev_32 - ) >> $temp - - # - # Check for duplicate syscall numbers - # - if ! cat $temp |cut -f1 |uniq -d 2>&1; then - echo "Error: generated system call table contains duplicate entries: $temp" >&2 - exit 1 - fi - - # - # Generate syscall table - # - prev_nr=0 - while read nr entry64 entry32; do - while test $prev_nr -lt $((nr - 1)); do - printf "NI_SYSCALL\n" - prev_nr=$((prev_nr + 1)) - done - if test x$entry64 = xsys_ni_syscall && - test x$entry32 = xsys_ni_syscall; then - printf "NI_SYSCALL\n" - else - printf "SYSCALL(%s,%s)\n" $entry64 $entry32 - fi - prev_nr=$nr - done < $temp - rm $temp -} - -generate_syscall_table() -{ - cat <<-EoHEADER - /* SPDX-License-Identifier: GPL-2.0 */ - /* - * Definitions for sys_call_table, each line represents an - * entry in the table in the form - * SYSCALL(64 bit syscall, 31 bit emulated syscall) - * - * This file is meant to be included from entry.S. - */ - - #define NI_SYSCALL SYSCALL(sys_ni_syscall,sys_ni_syscall) - -EoHEADER - grep -Ev '^(#|[[:blank:]]*$)' $SYSCALL_TBL \ - |sort -k1 -n \ - |create_syscall_table_entries -} - -create_header_defines() -{ - local nr abi name _ignore - - while read nr abi name _ignore; do - printf "#define __NR_%s %d\n" $name $nr - done -} - -normalize_fileguard() -{ - local fileguard="$1" - - echo "$1" |tr '[[:lower:]]' '[[:upper:]]' \ - |sed -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g' -} - -generate_syscall_header() -{ - local abis=$(echo "($1)" | tr ',' '|') - local filename="$2" - local fileguard suffix - - if test "$filename"; then - fileguard=$(normalize_fileguard "__UAPI_ASM_S390_$2") - else - case "$abis" in - *64*) suffix=64 ;; - *32*) suffix=32 ;; - esac - fileguard=$(normalize_fileguard "__UAPI_ASM_S390_SYSCALLS_$suffix") - fi - - cat <<-EoHEADER - /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ - #ifndef ${fileguard} - #define ${fileguard} - -EoHEADER - - grep -E "^[[:digit:]]+[[:space:]]+${abis}" $SYSCALL_TBL \ - |sort -k1 -n \ - |create_header_defines - - cat <<-EoFOOTER - - #endif /* ${fileguard} */ -EoFOOTER -} - -__max_syscall_nr() -{ - local abis=$(echo "($1)" | tr ',' '|') - - grep -E "^[[:digit:]]+[[:space:]]+${abis}" $SYSCALL_TBL \ - |sed -ne 's/^\([[:digit:]]*\)[[:space:]].*/\1/p' \ - |sort -n \ - |tail -1 -} - - -generate_syscall_nr() -{ - local abis="$1" - local max_syscall_nr num_syscalls - - max_syscall_nr=$(__max_syscall_nr "$abis") - num_syscalls=$((max_syscall_nr + 1)) - - cat <<-EoHEADER - /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ - #ifndef __ASM_S390_SYSCALLS_NR - #define __ASM_S390_SYSCALLS_NR - - #define NR_syscalls ${num_syscalls} - - #endif /* __ASM_S390_SYSCALLS_NR */ -EoHEADER -} - - -# -# Parse command line arguments -# -do_syscall_header="" -do_syscall_table="" -do_syscall_nr="" -output_file="" -abi_list="common,64" -filename="" -while getopts ":HNSXi:a:f:" arg; do - case $arg in - a) - abi_list="$OPTARG" - ;; - i) - SYSCALL_TBL="$OPTARG" - ;; - f) - filename=${OPTARG##*/} - ;; - H) - do_syscall_header=1 - ;; - N) - do_syscall_nr=1 - ;; - S) - do_syscall_table=1 - ;; - X) - set -x - ;; - :) - echo "Missing argument for -$OPTARG" >&2 - exit 1 - ;; - \?) - echo "Invalid option specified" >&2 - exit 1 - ;; - esac -done - -test "$do_syscall_header" && generate_syscall_header "$abi_list" "$filename" -test "$do_syscall_table" && generate_syscall_table -test "$do_syscall_nr" && generate_syscall_nr "$abi_list" - -exit 0 diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 88055f58fbda..33ca3e47a0e6 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -5,6 +5,7 @@ * Martin Schwidefsky <schwidefsky@de.ibm.com>, */ +#include <linux/cpufeature.h> #include <linux/debugfs.h> #include <linux/kernel.h> #include <linux/mm.h> @@ -15,54 +16,17 @@ #include <linux/export.h> #include <linux/slab.h> #include <asm/asm-extable.h> +#include <asm/machine.h> #include <asm/ebcdic.h> #include <asm/debug.h> #include <asm/sysinfo.h> #include <asm/cpcmd.h> #include <asm/topology.h> #include <asm/fpu.h> +#include <asm/asm.h> int topology_max_mnest; -static inline int __stsi(void *sysinfo, int fc, int sel1, int sel2, int *lvl) -{ - int r0 = (fc << 28) | sel1; - int rc = 0; - - asm volatile( - " lr 0,%[r0]\n" - " lr 1,%[r1]\n" - " stsi 0(%[sysinfo])\n" - "0: jz 2f\n" - "1: lhi %[rc],%[retval]\n" - "2: lr %[r0],0\n" - EX_TABLE(0b, 1b) - : [r0] "+d" (r0), [rc] "+d" (rc) - : [r1] "d" (sel2), - [sysinfo] "a" (sysinfo), - [retval] "K" (-EOPNOTSUPP) - : "cc", "0", "1", "memory"); - *lvl = ((unsigned int) r0) >> 28; - return rc; -} - -/* - * stsi - store system information - * - * Returns the current configuration level if function code 0 was specified. - * Otherwise returns 0 on success or a negative value on error. - */ -int stsi(void *sysinfo, int fc, int sel1, int sel2) -{ - int lvl, rc; - - rc = __stsi(sysinfo, fc, sel1, sel2, &lvl); - if (rc) - return rc; - return fc ? 0 : lvl; -} -EXPORT_SYMBOL(stsi); - #ifdef CONFIG_PROC_FS static bool convert_ext_name(unsigned char encoding, char *name, size_t len) @@ -154,7 +118,7 @@ static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info) int i; seq_putc(m, '\n'); - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) return; if (stsi(info, 15, 1, topology_max_mnest)) return; @@ -415,7 +379,7 @@ static struct service_level service_level_vm = { static __init int create_proc_service_level(void) { proc_create_seq("service_levels", 0, NULL, &service_level_seq_ops); - if (MACHINE_IS_VM) + if (machine_is_vm()) register_service_level(&service_level_vm); return 0; } @@ -559,10 +523,10 @@ static __init int stsi_init_debugfs(void) sf = &stsi_file[i]; debugfs_create_file(sf->name, 0400, stsi_root, NULL, sf->fops); } - if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && MACHINE_HAS_TOPOLOGY) { + if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && cpu_has_topology()) { char link_to[10]; - sprintf(link_to, "15_1_%d", topology_mnest_limit()); + snprintf(link_to, sizeof(link_to), "15_1_%d", topology_mnest_limit()); debugfs_create_symlink("topology", stsi_root, link_to); } return 0; diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S index c0a70efa2426..26f2981aa09e 100644 --- a/arch/s390/kernel/text_amode31.S +++ b/arch/s390/kernel/text_amode31.S @@ -18,8 +18,7 @@ * affects a few functions that are not performance-relevant. */ .macro BR_EX_AMODE31_r14 - larl %r1,0f - ex 0,0(%r1) + exrl 0,0f j . 0: br %r14 .endm diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 34a65c141ea0..bd0df61d1907 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -12,8 +12,7 @@ * Copyright (C) 1991, 1992, 1995 Linus Torvalds */ -#define KMSG_COMPONENT "time" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "time: " fmt #include <linux/kernel_stat.h> #include <linux/errno.h> @@ -54,10 +53,10 @@ #include <asm/cio.h> #include "entry.h" -union tod_clock tod_clock_base __section(".data"); +union tod_clock __bootdata_preserved(tod_clock_base); EXPORT_SYMBOL_GPL(tod_clock_base); -u64 clock_comparator_max = -1ULL; +u64 __bootdata_preserved(clock_comparator_max); EXPORT_SYMBOL_GPL(clock_comparator_max); static DEFINE_PER_CPU(struct clock_event_device, comparators); @@ -69,8 +68,6 @@ unsigned char ptff_function_mask[16]; static unsigned long lpar_offset; static unsigned long initial_leap_seconds; -static unsigned long tod_steering_end; -static long tod_steering_delta; /* * Get time offsets with PTFF @@ -79,12 +76,8 @@ void __init time_early_init(void) { struct ptff_qto qto; struct ptff_qui qui; - int cs; - /* Initialize TOD steering parameters */ - tod_steering_end = tod_clock_base.tod; - for (cs = 0; cs < CS_BASES; cs++) - vdso_data[cs].arch_data.tod_steering_end = tod_steering_end; + vdso_k_time_data->arch_data.tod_delta = tod_clock_base.tod; if (!test_facility(28)) return; @@ -228,21 +221,7 @@ void __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time, static u64 read_tod_clock(struct clocksource *cs) { - unsigned long now, adj; - - preempt_disable(); /* protect from changes to steering parameters */ - now = get_tod_clock(); - adj = tod_steering_end - now; - if (unlikely((s64) adj > 0)) - /* - * manually steer by 1 cycle every 2^16 cycles. This - * corresponds to shifting the tod delta by 15. 1s is - * therefore steered in ~9h. The adjust will decrease - * over time, until it finally reaches 0. - */ - now += (tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15); - preempt_enable(); - return now; + return get_tod_clock_monotonic(); } static struct clocksource clocksource_tod = { @@ -371,29 +350,11 @@ static inline int check_sync_clock(void) */ static void clock_sync_global(long delta) { - unsigned long now, adj; struct ptff_qto qto; - int cs; /* Fixup the monotonic sched clock. */ tod_clock_base.eitod += delta; - /* Adjust TOD steering parameters. */ - now = get_tod_clock(); - adj = tod_steering_end - now; - if (unlikely((s64) adj >= 0)) - /* Calculate how much of the old adjustment is left. */ - tod_steering_delta = (tod_steering_delta < 0) ? - -(adj >> 15) : (adj >> 15); - tod_steering_delta += delta; - if ((abs(tod_steering_delta) >> 48) != 0) - panic("TOD clock sync offset %li is too large to drift\n", - tod_steering_delta); - tod_steering_end = now + (abs(tod_steering_delta) << 15); - for (cs = 0; cs < CS_BASES; cs++) { - vdso_data[cs].arch_data.tod_steering_end = tod_steering_end; - vdso_data[cs].arch_data.tod_steering_delta = tod_steering_delta; - } - + vdso_k_time_data->arch_data.tod_delta = tod_clock_base.tod; /* Update LPAR offset. */ if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0) lpar_offset = qto.tod_epoch_difference; @@ -435,7 +396,7 @@ struct clock_sync_data { /* * Server Time Protocol (STP) code. */ -static bool stp_online; +static bool stp_online = true; static struct stp_sstpi stp_info; static void *stp_page; @@ -461,7 +422,6 @@ static void __init stp_reset(void) if (rc == 0) set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags); else if (stp_online) { - pr_warn("The real or virtual hardware system does not provide an STP interface\n"); free_page((unsigned long) stp_page); stp_page = NULL; stp_online = false; @@ -585,7 +545,7 @@ static int stp_sync_clock(void *data) atomic_dec(&sync->cpus); /* Wait for in_sync to be set. */ while (READ_ONCE(sync->in_sync) == 0) - __udelay(1); + ; } if (sync->in_sync != 1) /* Didn't work. Clear per-cpu in sync bit again. */ @@ -596,81 +556,6 @@ static int stp_sync_clock(void *data) return 0; } -static int stp_clear_leap(void) -{ - struct __kernel_timex txc; - int ret; - - memset(&txc, 0, sizeof(txc)); - - ret = do_adjtimex(&txc); - if (ret < 0) - return ret; - - txc.modes = ADJ_STATUS; - txc.status &= ~(STA_INS|STA_DEL); - return do_adjtimex(&txc); -} - -static void stp_check_leap(void) -{ - struct stp_stzi stzi; - struct stp_lsoib *lsoib = &stzi.lsoib; - struct __kernel_timex txc; - int64_t timediff; - int leapdiff, ret; - - if (!stp_info.lu || !check_sync_clock()) { - /* - * Either a scheduled leap second was removed by the operator, - * or STP is out of sync. In both cases, clear the leap second - * kernel flags. - */ - if (stp_clear_leap() < 0) - pr_err("failed to clear leap second flags\n"); - return; - } - - if (chsc_stzi(stp_page, &stzi, sizeof(stzi))) { - pr_err("stzi failed\n"); - return; - } - - timediff = tod_to_ns(lsoib->nlsout - get_tod_clock()) / NSEC_PER_SEC; - leapdiff = lsoib->nlso - lsoib->also; - - if (leapdiff != 1 && leapdiff != -1) { - pr_err("Cannot schedule %d leap seconds\n", leapdiff); - return; - } - - if (timediff < 0) { - if (stp_clear_leap() < 0) - pr_err("failed to clear leap second flags\n"); - } else if (timediff < 7200) { - memset(&txc, 0, sizeof(txc)); - ret = do_adjtimex(&txc); - if (ret < 0) - return; - - txc.modes = ADJ_STATUS; - if (leapdiff > 0) - txc.status |= STA_INS; - else - txc.status |= STA_DEL; - ret = do_adjtimex(&txc); - if (ret < 0) - pr_err("failed to set leap second flags\n"); - /* arm Timer to clear leap second flags */ - mod_timer(&stp_timer, jiffies + msecs_to_jiffies(14400 * MSEC_PER_SEC)); - } else { - /* The day the leap second is scheduled for hasn't been reached. Retry - * in one hour. - */ - mod_timer(&stp_timer, jiffies + msecs_to_jiffies(3600 * MSEC_PER_SEC)); - } -} - /* * STP work. Check for the STP state and take over the clock * synchronization if the STP clock source is usable. @@ -685,7 +570,7 @@ static void stp_work_fn(struct work_struct *work) if (!stp_online) { chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL); - del_timer_sync(&stp_timer); + timer_delete_sync(&stp_timer); goto out_unlock; } @@ -712,8 +597,6 @@ static void stp_work_fn(struct work_struct *work) * Retry after a second. */ mod_timer(&stp_timer, jiffies + msecs_to_jiffies(MSEC_PER_SEC)); - else if (stp_info.lu) - stp_check_leap(); out_unlock: mutex_unlock(&stp_mutex); diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 4f9c301a705b..1913a5566ac2 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -3,9 +3,9 @@ * Copyright IBM Corp. 2007, 2011 */ -#define KMSG_COMPONENT "cpu" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "cpu: " fmt +#include <linux/cpufeature.h> #include <linux/workqueue.h> #include <linux/memblock.h> #include <linux/uaccess.h> @@ -240,7 +240,7 @@ int topology_set_cpu_management(int fc) { int cpu, rc; - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) return -EOPNOTSUPP; if (fc) rc = ptf(PTF_VERTICAL); @@ -315,13 +315,13 @@ static int __arch_update_cpu_topology(void) hd_status = 0; rc = 0; mutex_lock(&smp_cpu_state_mutex); - if (MACHINE_HAS_TOPOLOGY) { + if (cpu_has_topology()) { rc = 1; store_topology(info); tl_to_masks(info); } update_cpu_masks(); - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) topology_update_polarization_simple(); if (cpu_management == 1) hd_status = hd_enable_hiperdispatch(); @@ -371,12 +371,12 @@ static void set_topology_timer(void) if (atomic_add_unless(&topology_poll, -1, 0)) mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100)); else - mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC)); + mod_timer(&topology_timer, jiffies + secs_to_jiffies(60)); } void topology_expect_change(void) { - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) return; /* This is racy, but it doesn't matter since it is just a heuristic. * Worst case is that we poll in a higher frequency for a bit longer. @@ -500,7 +500,7 @@ int topology_cpu_init(struct cpu *cpu) int rc; rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); - if (rc || !MACHINE_HAS_TOPOLOGY) + if (rc || !cpu_has_topology()) return rc; rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group); if (rc) @@ -508,33 +508,27 @@ int topology_cpu_init(struct cpu *cpu) return rc; } -static const struct cpumask *cpu_thread_mask(int cpu) -{ - return &cpu_topology[cpu].thread_mask; -} - - const struct cpumask *cpu_coregroup_mask(int cpu) { return &cpu_topology[cpu].core_mask; } -static const struct cpumask *cpu_book_mask(int cpu) +static const struct cpumask *tl_book_mask(struct sched_domain_topology_level *tl, int cpu) { return &cpu_topology[cpu].book_mask; } -static const struct cpumask *cpu_drawer_mask(int cpu) +static const struct cpumask *tl_drawer_mask(struct sched_domain_topology_level *tl, int cpu) { return &cpu_topology[cpu].drawer_mask; } static struct sched_domain_topology_level s390_topology[] = { - { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, - { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, - { cpu_book_mask, SD_INIT_NAME(BOOK) }, - { cpu_drawer_mask, SD_INIT_NAME(DRAWER) }, - { cpu_cpu_mask, SD_INIT_NAME(PKG) }, + SDTL_INIT(tl_smt_mask, cpu_smt_flags, SMT), + SDTL_INIT(tl_mc_mask, cpu_core_flags, MC), + SDTL_INIT(tl_book_mask, NULL, BOOK), + SDTL_INIT(tl_drawer_mask, NULL, DRAWER), + SDTL_INIT(tl_pkg_mask, NULL, PKG), { NULL, }, }; @@ -548,33 +542,38 @@ static void __init alloc_masks(struct sysinfo_15_1_x *info, nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; nr_masks = max(nr_masks, 1); for (i = 0; i < nr_masks; i++) { - mask->next = memblock_alloc(sizeof(*mask->next), 8); - if (!mask->next) - panic("%s: Failed to allocate %zu bytes align=0x%x\n", - __func__, sizeof(*mask->next), 8); + mask->next = memblock_alloc_or_panic(sizeof(*mask->next), 8); mask = mask->next; } } +static int __init detect_polarization(union topology_entry *tle) +{ + struct topology_core *tl_core; + + while (tle->nl) + tle = next_tle(tle); + tl_core = (struct topology_core *)tle; + return tl_core->pp != POLARIZATION_HRZ; +} + void __init topology_init_early(void) { struct sysinfo_15_1_x *info; set_sched_topology(s390_topology); if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) { - if (MACHINE_HAS_TOPOLOGY) + if (cpu_has_topology()) topology_mode = TOPOLOGY_MODE_HW; else topology_mode = TOPOLOGY_MODE_SINGLE; } - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) goto out; - tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - if (!tl_info) - panic("%s: Failed to allocate %lu bytes align=0x%lx\n", - __func__, PAGE_SIZE, PAGE_SIZE); + tl_info = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE); info = tl_info; store_topology(info); + cpu_management = detect_polarization(info->tle); pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n", info->mag[0], info->mag[1], info->mag[2], info->mag[3], info->mag[4], info->mag[5], info->mnest); @@ -591,7 +590,7 @@ static inline int topology_get_mode(int enabled) { if (!enabled) return TOPOLOGY_MODE_SINGLE; - return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; + return cpu_has_topology() ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; } static inline int topology_is_enabled(void) @@ -662,7 +661,7 @@ static int polarization_ctl_handler(const struct ctl_table *ctl, int write, return set_polarization(polarization); } -static struct ctl_table topology_ctl_table[] = { +static const struct ctl_table topology_ctl_table[] = { { .procname = "topology", .mode = 0644, @@ -681,7 +680,7 @@ static int __init topology_init(void) int rc = 0; timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE); - if (MACHINE_HAS_TOPOLOGY) + if (cpu_has_topology()) set_topology_timer(); else topology_update_polarization_simple(); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 24fee11b030d..19687dab32f7 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -3,18 +3,13 @@ * S390 version * Copyright IBM Corp. 1999, 2000 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), * * Derived from "arch/i386/kernel/traps.c" * Copyright (C) 1991, 1992 Linus Torvalds */ -/* - * 'Traps.c' handles hardware traps and faults after we have saved some - * state in 'asm.s'. - */ -#include "asm/irqflags.h" -#include "asm/ptrace.h" +#include <linux/cpufeature.h> #include <linux/kprobes.h> #include <linux/kdebug.h> #include <linux/randomize_kstack.h> @@ -29,6 +24,8 @@ #include <linux/entry-common.h> #include <linux/kmsan.h> #include <asm/asm-extable.h> +#include <asm/irqflags.h> +#include <asm/ptrace.h> #include <asm/vtime.h> #include <asm/fpu.h> #include <asm/fault.h> @@ -42,7 +39,7 @@ static inline void __user *get_trap_ip(struct pt_regs *regs) address = current->thread.trap_tdb.data[3]; else address = regs->psw.addr; - return (void __user *) (address - (regs->int_code >> 16)); + return (void __user *)(address - (regs->int_code >> 16)); } #ifdef CONFIG_GENERIC_BUG @@ -57,16 +54,15 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) if (user_mode(regs)) { force_sig_fault(si_signo, si_code, get_trap_ip(regs)); report_user_fault(regs, si_signo, 0); - } else { + } else { if (!fixup_exception(regs)) die(regs, str); - } + } } static void do_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) { - if (notify_die(DIE_TRAP, str, regs, 0, - regs->int_code, si_signo) == NOTIFY_STOP) + if (notify_die(DIE_TRAP, str, regs, 0, regs->int_code, si_signo) == NOTIFY_STOP) return; do_report_trap(regs, si_signo, si_code, str); } @@ -78,8 +74,7 @@ void do_per_trap(struct pt_regs *regs) return; if (!current->ptrace) return; - force_sig_fault(SIGTRAP, TRAP_HWBKPT, - (void __force __user *) current->thread.per_event.address); + force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __force __user *)current->thread.per_event.address); } NOKPROBE_SYMBOL(do_per_trap); @@ -98,36 +93,25 @@ static void name(struct pt_regs *regs) \ do_trap(regs, signr, sicode, str); \ } -DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, - "addressing exception") -DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, - "execute exception") -DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, - "fixpoint divide exception") -DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, - "fixpoint overflow exception") -DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, - "HFP overflow exception") -DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, - "HFP underflow exception") -DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, - "HFP significance exception") -DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, - "HFP divide exception") -DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, - "HFP square root exception") -DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, - "operand exception") -DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, - "privileged operation") -DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, - "special operation exception") -DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, - "transaction constraint exception") +DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, "addressing exception") +DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, "fixpoint divide exception") +DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, "execute exception") +DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, "HFP divide exception") +DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, "HFP overflow exception") +DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, "HFP significance exception") +DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, "HFP square root exception") +DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, "HFP underflow exception") +DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, "operand exception") +DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, "fixpoint overflow exception") +DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, "privileged operation") +DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, "special operation exception") +DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, "specification exception"); +DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, "transaction constraint exception") static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc) { int si_code = 0; + /* FPC[2] is Data Exception Code */ if ((fpc & 0x00000300) == 0) { /* bits 6 and 7 of DXC are 0 iff IEEE exception */ @@ -153,36 +137,35 @@ static void translation_specification_exception(struct pt_regs *regs) static void illegal_op(struct pt_regs *regs) { - __u8 opcode[6]; - __u16 __user *location; int is_uprobe_insn = 0; + u16 __user *location; int signal = 0; + u16 opcode; location = get_trap_ip(regs); - if (user_mode(regs)) { - if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) + if (get_user(opcode, location)) return; - if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) { + if (opcode == S390_BREAKPOINT_U16) { if (current->ptrace) force_sig_fault(SIGTRAP, TRAP_BRKPT, location); else signal = SIGILL; #ifdef CONFIG_UPROBES - } else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) { + } else if (opcode == UPROBE_SWBP_INSN) { is_uprobe_insn = 1; #endif - } else + } else { signal = SIGILL; + } } /* - * We got either an illegal op in kernel mode, or user space trapped + * This is either an illegal op in kernel mode, or user space trapped * on a uprobes illegal instruction. See if kprobes or uprobes picks * it up. If not, SIGILL. */ if (is_uprobe_insn || !user_mode(regs)) { - if (notify_die(DIE_BPT, "bpt", regs, 0, - 3, SIGTRAP) != NOTIFY_STOP) + if (notify_die(DIE_BPT, "bpt", regs, 0, 3, SIGTRAP) != NOTIFY_STOP) signal = SIGILL; } if (signal) @@ -190,18 +173,10 @@ static void illegal_op(struct pt_regs *regs) } NOKPROBE_SYMBOL(illegal_op); -DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, - "specification exception"); - static void vector_exception(struct pt_regs *regs) { int si_code, vic; - if (!cpu_has_vx()) { - do_trap(regs, SIGILL, ILL_ILLOPN, "illegal operation"); - return; - } - /* get vector interrupt code from fpc */ save_user_fpu_regs(); vic = (current->thread.ufpu.fpc & 0xf00) >> 8; @@ -249,7 +224,6 @@ static void monitor_event_exception(struct pt_regs *regs) { if (user_mode(regs)) return; - switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) { case BUG_TRAP_TYPE_NONE: fixup_exception(regs); @@ -262,7 +236,7 @@ static void monitor_event_exception(struct pt_regs *regs) } } -void kernel_stack_overflow(struct pt_regs *regs) +void kernel_stack_invalid(struct pt_regs *regs) { /* * Normally regs are unpoisoned by the generic entry code, but @@ -270,12 +244,12 @@ void kernel_stack_overflow(struct pt_regs *regs) */ kmsan_unpoison_entry_regs(regs); bust_spinlocks(1); - printk("Kernel stack overflow.\n"); + pr_emerg("Kernel stack pointer invalid\n"); show_regs(regs); bust_spinlocks(0); - panic("Corrupt kernel stack, can't continue."); + panic("Invalid kernel stack pointer, cannot continue"); } -NOKPROBE_SYMBOL(kernel_stack_overflow); +NOKPROBE_SYMBOL(kernel_stack_invalid); static void __init test_monitor_call(void) { @@ -283,12 +257,12 @@ static void __init test_monitor_call(void) if (!IS_ENABLED(CONFIG_BUG)) return; - asm volatile( + asm_inline volatile( " mc 0,0\n" - "0: xgr %0,%0\n" + "0: lhi %[val],0\n" "1:\n" - EX_TABLE(0b,1b) - : "+d" (val)); + EX_TABLE(0b, 1b) + : [val] "+d" (val)); if (!val) panic("Monitor call doesn't work!\n"); } @@ -323,7 +297,6 @@ void noinstr __do_pgm_check(struct pt_regs *regs) teid.val = lc->trans_exc_code; regs->int_code = lc->pgm_int_code; regs->int_parm_long = teid.val; - /* * In case of a guest fault, short-circuit the fault handler and return. * This way the sie64a() function will return 0; fault address and @@ -336,23 +309,19 @@ void noinstr __do_pgm_check(struct pt_regs *regs) current->thread.gmap_int_code = regs->int_code & 0xffff; return; } - state = irqentry_enter(regs); - if (user_mode(regs)) { update_timer_sys(); - if (!static_branch_likely(&cpu_has_bear)) { + if (!cpu_has_bear()) { if (regs->last_break < 4096) regs->last_break = 1; } current->thread.last_break = regs->last_break; } - if (lc->pgm_code & 0x0200) { /* transaction abort */ current->thread.trap_tdb = lc->pgm_tdb; } - if (lc->pgm_code & PGM_INT_CODE_PER) { if (user_mode(regs)) { struct per_event *ev = ¤t->thread.per_event; @@ -368,11 +337,9 @@ void noinstr __do_pgm_check(struct pt_regs *regs) goto out; } } - if (!irqs_disabled_flags(regs->psw.mask)) trace_hardirqs_on(); __arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER); - trapnr = regs->int_code & PGM_INT_CODE_MASK; if (trapnr) pgm_check_table[trapnr](regs); diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c index cd44be2b6ce8..0f88caca4eaf 100644 --- a/arch/s390/kernel/unwind_bc.c +++ b/arch/s390/kernel/unwind_bc.c @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ + +#include <linux/export.h> #include <linux/sched.h> #include <linux/sched/task.h> #include <linux/sched/task_stack.h> diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c index 5b0633ea8d93..c624f3361e43 100644 --- a/arch/s390/kernel/uprobes.c +++ b/arch/s390/kernel/uprobes.c @@ -8,7 +8,6 @@ #include <linux/uaccess.h> #include <linux/uprobes.h> -#include <linux/compat.h> #include <linux/kdebug.h> #include <linux/sched/task_stack.h> @@ -29,7 +28,7 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { if (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT) return -EINVAL; - if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT) + if (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT) return -EINVAL; clear_thread_flag(TIF_PER_TRAP); auprobe->saved_per = psw_bits(regs->psw).per; @@ -161,11 +160,6 @@ bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, /* Instruction Emulation */ -static void adjust_psw_addr(psw_t *psw, unsigned long len) -{ - psw->addr = __rewind_psw(*psw, -len); -} - #define EMU_ILLEGAL_OP 1 #define EMU_SPECIFICATION 2 #define EMU_ADDRESSING 3 @@ -353,7 +347,7 @@ static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs) } break; } - adjust_psw_addr(®s->psw, ilen); + regs->psw.addr = __forward_psw(regs->psw, ilen); switch (rc) { case EMU_ILLEGAL_OP: regs->int_code = ilen << 16 | 0x0001; @@ -373,8 +367,7 @@ static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs) bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) { if ((psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT) || - ((psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT) && - !is_compat_task())) { + (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT)) { regs->psw.addr = __rewind_psw(regs->psw, UPROBE_SWBP_INSN_SIZE); do_report_trap(regs, SIGILL, ILL_ILLADR, NULL); return true; diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 6f9654a191ad..ed46950be86f 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -4,9 +4,9 @@ * * Copyright IBM Corp. 2019, 2024 */ -#define KMSG_COMPONENT "prot_virt" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "prot_virt: " fmt +#include <linux/export.h> #include <linux/kernel.h> #include <linux/types.h> #include <linux/sizes.h> @@ -15,23 +15,11 @@ #include <linux/pagemap.h> #include <linux/swap.h> #include <linux/pagewalk.h> +#include <linux/backing-dev.h> #include <asm/facility.h> #include <asm/sections.h> #include <asm/uv.h> -#if !IS_ENABLED(CONFIG_KVM) -unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) -{ - return 0; -} - -int gmap_fault(struct gmap *gmap, unsigned long gaddr, - unsigned int fault_flags) -{ - return 0; -} -#endif - /* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */ int __bootdata_preserved(prot_virt_guest); EXPORT_SYMBOL(prot_virt_guest); @@ -148,17 +136,18 @@ int uv_destroy_folio(struct folio *folio) { int rc; - /* See gmap_make_secure(): large folios cannot be secure */ + /* Large folios cannot be secure */ if (unlikely(folio_test_large(folio))) return 0; folio_get(folio); rc = uv_destroy(folio_to_phys(folio)); if (!rc) - clear_bit(PG_arch_1, &folio->flags); + clear_bit(PG_arch_1, &folio->flags.f); folio_put(folio); return rc; } +EXPORT_SYMBOL(uv_destroy_folio); /* * The present PTE still indirectly holds a folio reference through the mapping. @@ -175,7 +164,7 @@ int uv_destroy_pte(pte_t pte) * * @paddr: Absolute host address of page to be exported */ -static int uv_convert_from_secure(unsigned long paddr) +int uv_convert_from_secure(unsigned long paddr) { struct uv_cb_cfs uvcb = { .header.cmd = UVC_CMD_CONV_FROM_SEC_STOR, @@ -187,25 +176,27 @@ static int uv_convert_from_secure(unsigned long paddr) return -EINVAL; return 0; } +EXPORT_SYMBOL_GPL(uv_convert_from_secure); /* * The caller must already hold a reference to the folio. */ -static int uv_convert_from_secure_folio(struct folio *folio) +int uv_convert_from_secure_folio(struct folio *folio) { int rc; - /* See gmap_make_secure(): large folios cannot be secure */ + /* Large folios cannot be secure */ if (unlikely(folio_test_large(folio))) return 0; folio_get(folio); rc = uv_convert_from_secure(folio_to_phys(folio)); if (!rc) - clear_bit(PG_arch_1, &folio->flags); + clear_bit(PG_arch_1, &folio->flags.f); folio_put(folio); return rc; } +EXPORT_SYMBOL_GPL(uv_convert_from_secure_folio); /* * The present PTE still indirectly holds a folio reference through the mapping. @@ -216,6 +207,39 @@ int uv_convert_from_secure_pte(pte_t pte) return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte))); } +/** + * should_export_before_import - Determine whether an export is needed + * before an import-like operation + * @uvcb: the Ultravisor control block of the UVC to be performed + * @mm: the mm of the process + * + * Returns whether an export is needed before every import-like operation. + * This is needed for shared pages, which don't trigger a secure storage + * exception when accessed from a different guest. + * + * Although considered as one, the Unpin Page UVC is not an actual import, + * so it is not affected. + * + * No export is needed also when there is only one protected VM, because the + * page cannot belong to the wrong VM in that case (there is no "other VM" + * it can belong to). + * + * Return: true if an export is needed before every import, otherwise false. + */ +static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) +{ + /* + * The misc feature indicates, among other things, that importing a + * shared page from a different protected VM will automatically also + * transfer its ownership. + */ + if (uv_has_feature(BIT_UV_FEAT_MISC)) + return false; + if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) + return false; + return atomic_read(&mm->context.protected_count) > 1; +} + /* * Calculate the expected ref_count for a folio that would otherwise have no * further pins. This was cribbed from similar functions in other places in @@ -237,16 +261,34 @@ static int expected_folio_refs(struct folio *folio) return res; } -static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) +/** + * __make_folio_secure() - make a folio secure + * @folio: the folio to make secure + * @uvcb: the uvcb that describes the UVC to be used + * + * The folio @folio will be made secure if possible, @uvcb will be passed + * as-is to the UVC. + * + * Return: 0 on success; + * -EBUSY if the folio is in writeback or has too many references; + * -EAGAIN if the UVC needs to be attempted again; + * -ENXIO if the address is not mapped; + * -EINVAL if the UVC failed for other reasons. + * + * Context: The caller must hold exactly one extra reference on the folio + * (it's the same logic as split_folio()), and the folio must be + * locked. + */ +static int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) { int expected, cc = 0; if (folio_test_writeback(folio)) - return -EAGAIN; - expected = expected_folio_refs(folio); + return -EBUSY; + expected = expected_folio_refs(folio) + 1; if (!folio_ref_freeze(folio, expected)) return -EBUSY; - set_bit(PG_arch_1, &folio->flags); + set_bit(PG_arch_1, &folio->flags.f); /* * If the UVC does not succeed or fail immediately, we don't want to * loop for long, or we might get stall notifications. @@ -268,262 +310,167 @@ static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) return uvcb->rc == 0x10a ? -ENXIO : -EINVAL; } -/** - * should_export_before_import - Determine whether an export is needed - * before an import-like operation - * @uvcb: the Ultravisor control block of the UVC to be performed - * @mm: the mm of the process - * - * Returns whether an export is needed before every import-like operation. - * This is needed for shared pages, which don't trigger a secure storage - * exception when accessed from a different guest. - * - * Although considered as one, the Unpin Page UVC is not an actual import, - * so it is not affected. - * - * No export is needed also when there is only one protected VM, because the - * page cannot belong to the wrong VM in that case (there is no "other VM" - * it can belong to). - * - * Return: true if an export is needed before every import, otherwise false. - */ -static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) +static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct uv_cb_header *uvcb) { - /* - * The misc feature indicates, among other things, that importing a - * shared page from a different protected VM will automatically also - * transfer its ownership. - */ - if (uv_has_feature(BIT_UV_FEAT_MISC)) - return false; - if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) - return false; - return atomic_read(&mm->context.protected_count) > 1; -} + int rc; -/* - * Drain LRU caches: the local one on first invocation and the ones of all - * CPUs on successive invocations. Returns "true" on the first invocation. - */ -static bool drain_lru(bool *drain_lru_called) -{ - /* - * If we have tried a local drain and the folio refcount - * still does not match our expected safe value, try with a - * system wide drain. This is needed if the pagevecs holding - * the page are on a different CPU. - */ - if (*drain_lru_called) { - lru_add_drain_all(); - /* We give up here, don't retry immediately. */ - return false; - } - /* - * We are here if the folio refcount does not match the - * expected safe value. The main culprits are usually - * pagevecs. With lru_add_drain() we drain the pagevecs - * on the local CPU so that hopefully the refcount will - * reach the expected safe value. - */ - lru_add_drain(); - *drain_lru_called = true; - /* The caller should try again immediately */ - return true; + if (!folio_trylock(folio)) + return -EAGAIN; + if (should_export_before_import(uvcb, mm)) + uv_convert_from_secure(folio_to_phys(folio)); + rc = __make_folio_secure(folio, uvcb); + folio_unlock(folio); + + return rc; } -/* - * Requests the Ultravisor to make a page accessible to a guest. - * If it's brought in the first time, it will be cleared. If - * it has been exported before, it will be decrypted and integrity - * checked. +/** + * s390_wiggle_split_folio() - try to drain extra references to a folio and + * split the folio if it is large. + * @mm: the mm containing the folio to work on + * @folio: the folio + * + * Context: Must be called while holding an extra reference to the folio; + * the mm lock should not be held. + * Return: 0 if the operation was successful; + * -EAGAIN if splitting the large folio was not successful, + * but another attempt can be made; + * -EINVAL in case of other folio splitting errors. See split_folio(). */ -int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) +static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio) { - struct vm_area_struct *vma; - bool drain_lru_called = false; - spinlock_t *ptelock; - unsigned long uaddr; - struct folio *folio; - pte_t *ptep; - int rc; + int rc, tried_splits; -again: - rc = -EFAULT; - mmap_read_lock(gmap->mm); + lockdep_assert_not_held(&mm->mmap_lock); + folio_wait_writeback(folio); + lru_add_drain_all(); - uaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(uaddr)) - goto out; - vma = vma_lookup(gmap->mm, uaddr); - if (!vma) - goto out; - /* - * Secure pages cannot be huge and userspace should not combine both. - * In case userspace does it anyway this will result in an -EFAULT for - * the unpack. The guest is thus never reaching secure mode. If - * userspace is playing dirty tricky with mapping huge pages later - * on this will result in a segmentation fault. - */ - if (is_vm_hugetlb_page(vma)) - goto out; - - rc = -ENXIO; - ptep = get_locked_pte(gmap->mm, uaddr, &ptelock); - if (!ptep) - goto out; - if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) { - folio = page_folio(pte_page(*ptep)); - rc = -EAGAIN; - if (folio_test_large(folio)) { - rc = -E2BIG; - } else if (folio_trylock(folio)) { - if (should_export_before_import(uvcb, gmap->mm)) - uv_convert_from_secure(PFN_PHYS(folio_pfn(folio))); - rc = make_folio_secure(folio, uvcb); + if (!folio_test_large(folio)) + return 0; + + for (tried_splits = 0; tried_splits < 2; tried_splits++) { + struct address_space *mapping; + loff_t lstart, lend; + struct inode *inode; + + folio_lock(folio); + rc = split_folio(folio); + if (rc != -EBUSY) { folio_unlock(folio); + return rc; } /* - * Once we drop the PTL, the folio may get unmapped and - * freed immediately. We need a temporary reference. + * Splitting with -EBUSY can fail for various reasons, but we + * have to handle one case explicitly for now: some mappings + * don't allow for splitting dirty folios; writeback will + * mark them clean again, including marking all page table + * entries mapping the folio read-only, to catch future write + * attempts. + * + * While the system should be writing back dirty folios in the + * background, we obtained this folio by looking up a writable + * page table entry. On these problematic mappings, writable + * page table entries imply dirty folios, preventing the + * split in the first place. + * + * To prevent a livelock when trigger writeback manually and + * letting the caller look up the folio again in the page + * table (turning it dirty), immediately try to split again. + * + * This is only a problem for some mappings (e.g., XFS); + * mappings that do not support writeback (e.g., shmem) do not + * apply. */ - if (rc == -EAGAIN || rc == -E2BIG) - folio_get(folio); - } - pte_unmap_unlock(ptep, ptelock); -out: - mmap_read_unlock(gmap->mm); - - switch (rc) { - case -E2BIG: - folio_lock(folio); - rc = split_folio(folio); - folio_unlock(folio); - folio_put(folio); - - switch (rc) { - case 0: - /* Splitting succeeded, try again immediately. */ - goto again; - case -EAGAIN: - /* Additional folio references. */ - if (drain_lru(&drain_lru_called)) - goto again; - return -EAGAIN; - case -EBUSY: - /* Unexpected race. */ - return -EAGAIN; + if (!folio_test_dirty(folio) || folio_test_anon(folio) || + !folio->mapping || !mapping_can_writeback(folio->mapping)) { + folio_unlock(folio); + break; } - WARN_ON_ONCE(1); - return -ENXIO; - case -EAGAIN: + /* - * If we are here because the UVC returned busy or partial - * completion, this is just a useless check, but it is safe. + * Ideally, we'd only trigger writeback on this exact folio. But + * there is no easy way to do that, so we'll stabilize the + * mapping while we still hold the folio lock, so we can drop + * the folio lock to trigger writeback on the range currently + * covered by the folio instead. */ - folio_wait_writeback(folio); - folio_put(folio); - return -EAGAIN; - case -EBUSY: - /* Additional folio references. */ - if (drain_lru(&drain_lru_called)) - goto again; - return -EAGAIN; - case -ENXIO: - if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE)) - return -EFAULT; - return -EAGAIN; - } - return rc; -} -EXPORT_SYMBOL_GPL(gmap_make_secure); + mapping = folio->mapping; + lstart = folio_pos(folio); + lend = lstart + folio_size(folio) - 1; + inode = igrab(mapping->host); + folio_unlock(folio); -int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr) -{ - struct uv_cb_cts uvcb = { - .header.cmd = UVC_CMD_CONV_TO_SEC_STOR, - .header.len = sizeof(uvcb), - .guest_handle = gmap->guest_handle, - .gaddr = gaddr, - }; + if (unlikely(!inode)) + break; - return gmap_make_secure(gmap, gaddr, &uvcb); + filemap_write_and_wait_range(mapping, lstart, lend); + iput(mapping->host); + } + return -EAGAIN; } -EXPORT_SYMBOL_GPL(gmap_convert_to_secure); -/** - * gmap_destroy_page - Destroy a guest page. - * @gmap: the gmap of the guest - * @gaddr: the guest address to destroy - * - * An attempt will be made to destroy the given guest page. If the attempt - * fails, an attempt is made to export the page. If both attempts fail, an - * appropriate error is returned. - */ -int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr) +int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb) { struct vm_area_struct *vma; struct folio_walk fw; - unsigned long uaddr; struct folio *folio; int rc; - rc = -EFAULT; - mmap_read_lock(gmap->mm); - - uaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(uaddr)) - goto out; - vma = vma_lookup(gmap->mm, uaddr); - if (!vma) - goto out; - /* - * Huge pages should not be able to become secure - */ - if (is_vm_hugetlb_page(vma)) - goto out; + mmap_read_lock(mm); + vma = vma_lookup(mm, hva); + if (!vma) { + mmap_read_unlock(mm); + return -EFAULT; + } + folio = folio_walk_start(&fw, vma, hva, 0); + if (!folio) { + mmap_read_unlock(mm); + return -ENXIO; + } - rc = 0; - folio = folio_walk_start(&fw, vma, uaddr, 0); - if (!folio) - goto out; - /* - * See gmap_make_secure(): large folios cannot be secure. Small - * folio implies FW_LEVEL_PTE. - */ - if (folio_test_large(folio) || !pte_write(fw.pte)) - goto out_walk_end; - rc = uv_destroy_folio(folio); + folio_get(folio); /* - * Fault handlers can race; it is possible that two CPUs will fault - * on the same secure page. One CPU can destroy the page, reboot, - * re-enter secure mode and import it, while the second CPU was - * stuck at the beginning of the handler. At some point the second - * CPU will be able to progress, and it will not be able to destroy - * the page. In that case we do not want to terminate the process, - * we instead try to export the page. + * Secure pages cannot be huge and userspace should not combine both. + * In case userspace does it anyway this will result in an -EFAULT for + * the unpack. The guest is thus never reaching secure mode. + * If userspace plays dirty tricks and decides to map huge pages at a + * later point in time, it will receive a segmentation fault or + * KVM_RUN will return -EFAULT. */ - if (rc) - rc = uv_convert_from_secure_folio(folio); -out_walk_end: + if (folio_test_hugetlb(folio)) + rc = -EFAULT; + else if (folio_test_large(folio)) + rc = -E2BIG; + else if (!pte_write(fw.pte) || (pte_val(fw.pte) & _PAGE_INVALID)) + rc = -ENXIO; + else + rc = make_folio_secure(mm, folio, uvcb); folio_walk_end(&fw, vma); -out: - mmap_read_unlock(gmap->mm); + mmap_read_unlock(mm); + + if (rc == -E2BIG || rc == -EBUSY) { + rc = s390_wiggle_split_folio(mm, folio); + if (!rc) + rc = -EAGAIN; + } + folio_put(folio); + return rc; } -EXPORT_SYMBOL_GPL(gmap_destroy_page); +EXPORT_SYMBOL_GPL(make_hva_secure); /* * To be called with the folio locked or with an extra reference! This will - * prevent gmap_make_secure from touching the folio concurrently. Having 2 - * parallel arch_make_folio_accessible is fine, as the UV calls will become a - * no-op if the folio is already exported. + * prevent kvm_s390_pv_make_secure() from touching the folio concurrently. + * Having 2 parallel arch_make_folio_accessible is fine, as the UV calls will + * become a no-op if the folio is already exported. */ int arch_make_folio_accessible(struct folio *folio) { int rc = 0; - /* See gmap_make_secure(): large folios cannot be secure */ + /* Large folios cannot be secure */ if (unlikely(folio_test_large(folio))) return 0; @@ -535,18 +482,18 @@ int arch_make_folio_accessible(struct folio *folio) * convert_to_secure. * As secure pages are never large folios, both variants can co-exists. */ - if (!test_bit(PG_arch_1, &folio->flags)) + if (!test_bit(PG_arch_1, &folio->flags.f)) return 0; rc = uv_pin_shared(folio_to_phys(folio)); if (!rc) { - clear_bit(PG_arch_1, &folio->flags); + clear_bit(PG_arch_1, &folio->flags.f); return 0; } rc = uv_convert_from_secure(folio_to_phys(folio)); if (!rc) { - clear_bit(PG_arch_1, &folio->flags); + clear_bit(PG_arch_1, &folio->flags.f); return 0; } @@ -894,7 +841,12 @@ out_kobj: device_initcall(uv_sysfs_init); /* - * Find the secret with the secret_id in the provided list. + * Locate a secret in the list by its id. + * @secret_id: search pattern. + * @list: ephemeral buffer space + * @secret: output data, containing the secret's metadata. + * + * Search for a secret with the given secret_id in the Ultravisor secret store. * * Context: might sleep. */ @@ -913,14 +865,17 @@ static int find_secret_in_page(const u8 secret_id[UV_SECRET_ID_LEN], return -ENOENT; } -/* - * Do the actual search for `uv_get_secret_metadata`. +/** + * uv_find_secret() - search secret metadata for a given secret id. + * @secret_id: search pattern. + * @list: ephemeral buffer space + * @secret: output data, containing the secret's metadata. * * Context: might sleep. */ -static int find_secret(const u8 secret_id[UV_SECRET_ID_LEN], - struct uv_secret_list *list, - struct uv_secret_list_item_hdr *secret) +int uv_find_secret(const u8 secret_id[UV_SECRET_ID_LEN], + struct uv_secret_list *list, + struct uv_secret_list_item_hdr *secret) { u16 start_idx = 0; u16 list_rc; @@ -942,36 +897,7 @@ static int find_secret(const u8 secret_id[UV_SECRET_ID_LEN], return -ENOENT; } - -/** - * uv_get_secret_metadata() - get secret metadata for a given secret id. - * @secret_id: search pattern. - * @secret: output data, containing the secret's metadata. - * - * Search for a secret with the given secret_id in the Ultravisor secret store. - * - * Context: might sleep. - * - * Return: - * * %0: - Found entry; secret->idx and secret->type are valid. - * * %ENOENT - No entry found. - * * %ENODEV: - Not supported: UV not available or command not available. - * * %EIO: - Other unexpected UV error. - */ -int uv_get_secret_metadata(const u8 secret_id[UV_SECRET_ID_LEN], - struct uv_secret_list_item_hdr *secret) -{ - struct uv_secret_list *buf; - int rc; - - buf = kzalloc(sizeof(*buf), GFP_KERNEL); - if (!buf) - return -ENOMEM; - rc = find_secret(secret_id, buf, secret); - kfree(buf); - return rc; -} -EXPORT_SYMBOL_GPL(uv_get_secret_metadata); +EXPORT_SYMBOL_GPL(uv_find_secret); /** * uv_retrieve_secret() - get the secret value for the secret index. diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 598b512cde01..a27a90a199be 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -7,7 +7,6 @@ */ #include <linux/binfmts.h> -#include <linux/compat.h> #include <linux/elf.h> #include <linux/errno.h> #include <linux/init.h> @@ -16,94 +15,14 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/smp.h> -#include <linux/time_namespace.h> #include <linux/random.h> +#include <linux/vdso_datastore.h> #include <vdso/datapage.h> #include <asm/vdso/vsyscall.h> #include <asm/alternative.h> #include <asm/vdso.h> -extern char vdso64_start[], vdso64_end[]; -extern char vdso32_start[], vdso32_end[]; - -static struct vm_special_mapping vvar_mapping; - -static union vdso_data_store vdso_data_store __page_aligned_data; - -struct vdso_data *vdso_data = vdso_data_store.data; - -#ifdef CONFIG_TIME_NS -struct vdso_data *arch_get_vdso_data(void *vvar_page) -{ - return (struct vdso_data *)(vvar_page); -} - -/* - * The VVAR page layout depends on whether a task belongs to the root or - * non-root time namespace. Whenever a task changes its namespace, the VVAR - * page tables are cleared and then they will be re-faulted with a - * corresponding layout. - * See also the comment near timens_setup_vdso_data() for details. - */ -int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) -{ - struct mm_struct *mm = task->mm; - VMA_ITERATOR(vmi, mm, 0); - struct vm_area_struct *vma; - - mmap_read_lock(mm); - for_each_vma(vmi, vma) { - if (!vma_is_special_mapping(vma, &vvar_mapping)) - continue; - zap_vma_pages(vma); - break; - } - mmap_read_unlock(mm); - return 0; -} -#endif - -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *timens_page = find_timens_vvar_page(vma); - unsigned long addr, pfn; - vm_fault_t err; - - switch (vmf->pgoff) { - case VVAR_DATA_PAGE_OFFSET: - pfn = virt_to_pfn(vdso_data); - if (timens_page) { - /* - * Fault in VVAR page too, since it will be accessed - * to get clock data anyway. - */ - addr = vmf->address + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE; - err = vmf_insert_pfn(vma, addr, pfn); - if (unlikely(err & VM_FAULT_ERROR)) - return err; - pfn = page_to_pfn(timens_page); - } - break; -#ifdef CONFIG_TIME_NS - case VVAR_TIMENS_PAGE_OFFSET: - /* - * If a task belongs to a time namespace then a namespace - * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and - * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET - * offset. - * See also the comment near timens_setup_vdso_data(). - */ - if (!timens_page) - return VM_FAULT_SIGBUS; - pfn = virt_to_pfn(vdso_data); - break; -#endif /* CONFIG_TIME_NS */ - default: - return VM_FAULT_SIGBUS; - } - return vmf_insert_pfn(vma, vmf->address, pfn); -} +extern char vdso_start[], vdso_end[]; static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *vma) @@ -112,17 +31,7 @@ static int vdso_mremap(const struct vm_special_mapping *sm, return 0; } -static struct vm_special_mapping vvar_mapping = { - .name = "[vvar]", - .fault = vvar_fault, -}; - -static struct vm_special_mapping vdso64_mapping = { - .name = "[vdso]", - .mremap = vdso_mremap, -}; - -static struct vm_special_mapping vdso32_mapping = { +static struct vm_special_mapping vdso_mapping = { .name = "[vdso]", .mremap = vdso_mremap, }; @@ -137,39 +46,29 @@ early_initcall(vdso_getcpu_init); /* Must be called before SMP init */ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) { unsigned long vvar_start, vdso_text_start, vdso_text_len; - struct vm_special_mapping *vdso_mapping; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; int rc; - BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); + BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES); if (mmap_write_lock_killable(mm)) return -EINTR; - if (is_compat_task()) { - vdso_text_len = vdso32_end - vdso32_start; - vdso_mapping = &vdso32_mapping; - } else { - vdso_text_len = vdso64_end - vdso64_start; - vdso_mapping = &vdso64_mapping; - } + vdso_text_len = vdso_end - vdso_start; vvar_start = get_unmapped_area(NULL, addr, vdso_mapping_len, 0, 0); rc = vvar_start; if (IS_ERR_VALUE(vvar_start)) goto out; - vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE, - VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| - VM_PFNMAP, - &vvar_mapping); + vma = vdso_install_vvar_mapping(mm, vvar_start); rc = PTR_ERR(vma); if (IS_ERR(vma)) goto out; - vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE; + vdso_text_start = vvar_start + VDSO_NR_PAGES * PAGE_SIZE; /* VM_MAYWRITE for COW so gdb can set breakpoints */ vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len, - VM_READ|VM_EXEC| + VM_READ|VM_EXEC|VM_SEALED_SYSMAP| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - vdso_mapping); + &vdso_mapping); if (IS_ERR(vma)) { do_munmap(mm, vvar_start, PAGE_SIZE, NULL); rc = PTR_ERR(vma); @@ -209,18 +108,12 @@ static unsigned long vdso_addr(unsigned long start, unsigned long len) unsigned long vdso_text_size(void) { - unsigned long size; - - if (is_compat_task()) - size = vdso32_end - vdso32_start; - else - size = vdso64_end - vdso64_start; - return PAGE_ALIGN(size); + return PAGE_ALIGN(vdso_end - vdso_start); } unsigned long vdso_size(void) { - return vdso_text_size() + VVAR_NR_PAGES * PAGE_SIZE; + return vdso_text_size() + VDSO_NR_PAGES * PAGE_SIZE; } int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) @@ -253,7 +146,7 @@ static void vdso_apply_alternatives(void) struct alt_instr *start, *end; const struct elf64_hdr *hdr; - hdr = (struct elf64_hdr *)vdso64_start; + hdr = (struct elf64_hdr *)vdso_start; shdr = (void *)hdr + hdr->e_shoff; alt = find_section(hdr, shdr, ".altinstructions"); if (!alt) @@ -266,9 +159,7 @@ static void vdso_apply_alternatives(void) static int __init vdso_init(void) { vdso_apply_alternatives(); - vdso64_mapping.pages = vdso_setup_pages(vdso64_start, vdso64_end); - if (IS_ENABLED(CONFIG_COMPAT)) - vdso32_mapping.pages = vdso_setup_pages(vdso32_start, vdso32_end); + vdso_mapping.pages = vdso_setup_pages(vdso_start, vdso_end); return 0; } arch_initcall(vdso_init); diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso/.gitignore index 5167384843b9..652e31d82582 100644 --- a/arch/s390/kernel/vdso32/.gitignore +++ b/arch/s390/kernel/vdso/.gitignore @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -vdso32.lds +vdso.lds diff --git a/arch/s390/kernel/vdso/Makefile b/arch/s390/kernel/vdso/Makefile new file mode 100644 index 000000000000..2fa12d4ac106 --- /dev/null +++ b/arch/s390/kernel/vdso/Makefile @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: GPL-2.0 +# List of files in the vdso + +# Include the generic Makefile to check the built vdso. +include $(srctree)/lib/vdso/Makefile.include +obj-vdso = vdso_user_wrapper.o note.o vgetrandom-chacha.o +obj-cvdso = vdso_generic.o getcpu.o vgetrandom.o +VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) +CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE) +CFLAGS_REMOVE_vgetrandom.o = $(VDSO_CFLAGS_REMOVE) +CFLAGS_REMOVE_vdso_generic.o = $(VDSO_CFLAGS_REMOVE) + +ifneq ($(c-getrandom-y),) + CFLAGS_vgetrandom.o += -include $(c-getrandom-y) +endif + +# Build rules + +targets := $(obj-vdso) $(obj-cvdso) vdso.so vdso.so.dbg +obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) +obj-cvdso := $(addprefix $(obj)/, $(obj-cvdso)) + +KBUILD_AFLAGS_VDSO := $(KBUILD_AFLAGS) -DBUILD_VDSO + +KBUILD_CFLAGS_VDSO := $(KBUILD_CFLAGS) -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING +KBUILD_CFLAGS_VDSO := $(filter-out -mpacked-stack,$(KBUILD_CFLAGS_VDSO)) +KBUILD_CFLAGS_VDSO := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_VDSO)) +KBUILD_CFLAGS_VDSO := $(filter-out -munaligned-symbols,$(KBUILD_CFLAGS_VDSO)) +KBUILD_CFLAGS_VDSO := $(filter-out -fno-asynchronous-unwind-tables,$(KBUILD_CFLAGS_VDSO)) +KBUILD_CFLAGS_VDSO += -fPIC -fno-common -fno-builtin -fasynchronous-unwind-tables +KBUILD_CFLAGS_VDSO += -fno-stack-protector +ldflags-y := -shared -soname=linux-vdso.so.1 \ + --hash-style=both --build-id=sha1 -T + +$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_VDSO) +$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_VDSO) + +obj-y += vdso_wrapper.o +targets += vdso.lds +CPPFLAGS_vdso.lds += -P -C -U$(ARCH) + +# Force dependency (incbin is bad) +$(obj)/vdso_wrapper.o : $(obj)/vdso.so + +quiet_cmd_vdso_and_check = VDSO $@ + cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check) + +# link rule for the .so file, .lds has to be first +$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) $(obj-cvdso) FORCE + $(call if_changed,vdso_and_check) + +# strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# assembly rules for the .S files +$(obj-vdso): %.o: %.S FORCE + $(call if_changed_dep,vdsoas) + +$(obj-cvdso): %.o: %.c FORCE + $(call if_changed_dep,vdsocc) + +# actual build commands +quiet_cmd_vdsoas = VDSOA $@ + cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< +quiet_cmd_vdsocc = VDSOC $@ + cmd_vdsocc = $(CC) $(c_flags) -c -o $@ $< + +# Generate VDSO offsets using helper script +gen-vdsosym := $(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ + +include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE + $(call if_changed,vdsosym) diff --git a/arch/s390/kernel/vdso64/gen_vdso_offsets.sh b/arch/s390/kernel/vdso/gen_vdso_offsets.sh index 37f05cb38dad..359982fb002d 100755 --- a/arch/s390/kernel/vdso64/gen_vdso_offsets.sh +++ b/arch/s390/kernel/vdso/gen_vdso_offsets.sh @@ -12,4 +12,4 @@ # LC_ALL=C -sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso64_offset_\2\t0x\1/p' +sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso_offset_\2\t0x\1/p' diff --git a/arch/s390/kernel/vdso64/getcpu.c b/arch/s390/kernel/vdso/getcpu.c index 5c5d4a848b76..5c5d4a848b76 100644 --- a/arch/s390/kernel/vdso64/getcpu.c +++ b/arch/s390/kernel/vdso/getcpu.c diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso/note.S index db19d0680a0a..db19d0680a0a 100644 --- a/arch/s390/kernel/vdso32/note.S +++ b/arch/s390/kernel/vdso/note.S diff --git a/arch/s390/kernel/vdso64/vdso.h b/arch/s390/kernel/vdso/vdso.h index 9e5397e7b590..8cff033dd854 100644 --- a/arch/s390/kernel/vdso64/vdso.h +++ b/arch/s390/kernel/vdso/vdso.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ARCH_S390_KERNEL_VDSO64_VDSO_H -#define __ARCH_S390_KERNEL_VDSO64_VDSO_H +#ifndef __ARCH_S390_KERNEL_VDSO_VDSO_H +#define __ARCH_S390_KERNEL_VDSO_VDSO_H #include <vdso/datapage.h> @@ -12,4 +12,4 @@ int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts); ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len); -#endif /* __ARCH_S390_KERNEL_VDSO64_VDSO_H */ +#endif /* __ARCH_S390_KERNEL_VDSO_VDSO_H */ diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso/vdso.lds.S index ec42b7d9cb53..7bec4de0e8e0 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso/vdso.lds.S @@ -7,17 +7,16 @@ #include <asm/vdso/vsyscall.h> #include <asm/page.h> #include <asm/vdso.h> +#include <asm-generic/vmlinux.lds.h> +#include <vdso/datapage.h> OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") OUTPUT_ARCH(s390:64-bit) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); - PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text @@ -61,47 +60,9 @@ SECTIONS _end = .; PROVIDE(end = .); - /* - * Stabs debugging sections are here too. - */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } + STABS_DEBUG + DWARF_DEBUG .comment 0 : { *(.comment) } - - /* - * DWARF debug sections. - * Symbols in the DWARF debugging sections are relative to the - * beginning of the section so we begin them at 0. - */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } - /* DWARF 3 */ - .debug_pubtypes 0 : { *(.debug_pubtypes) } - .debug_ranges 0 : { *(.debug_ranges) } .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } /DISCARD/ : { diff --git a/arch/s390/kernel/vdso64/vdso64_generic.c b/arch/s390/kernel/vdso/vdso_generic.c index a9aa75643c08..a9aa75643c08 100644 --- a/arch/s390/kernel/vdso64/vdso64_generic.c +++ b/arch/s390/kernel/vdso/vdso_generic.c diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso/vdso_user_wrapper.S index aa06c85bcbd3..aa06c85bcbd3 100644 --- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S +++ b/arch/s390/kernel/vdso/vdso_user_wrapper.S diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso/vdso_wrapper.S index de2fb930471a..f69e62a14978 100644 --- a/arch/s390/kernel/vdso32/vdso32_wrapper.S +++ b/arch/s390/kernel/vdso/vdso_wrapper.S @@ -5,11 +5,11 @@ __PAGE_ALIGNED_DATA - .globl vdso32_start, vdso32_end + .globl vdso_start, vdso_end .balign PAGE_SIZE -vdso32_start: - .incbin "arch/s390/kernel/vdso32/vdso32.so" +vdso_start: + .incbin "arch/s390/kernel/vdso/vdso.so" .balign PAGE_SIZE -vdso32_end: +vdso_end: .previous diff --git a/arch/s390/kernel/vdso64/vgetrandom-chacha.S b/arch/s390/kernel/vdso/vgetrandom-chacha.S index 09c034c2f853..09c034c2f853 100644 --- a/arch/s390/kernel/vdso64/vgetrandom-chacha.S +++ b/arch/s390/kernel/vdso/vgetrandom-chacha.S diff --git a/arch/s390/kernel/vdso64/vgetrandom.c b/arch/s390/kernel/vdso/vgetrandom.c index b5268b507fb5..b5268b507fb5 100644 --- a/arch/s390/kernel/vdso64/vgetrandom.c +++ b/arch/s390/kernel/vdso/vgetrandom.c diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile deleted file mode 100644 index 2c5afb88d298..000000000000 --- a/arch/s390/kernel/vdso32/Makefile +++ /dev/null @@ -1,64 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# List of files in the vdso - -# Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile -obj-vdso32 = vdso_user_wrapper-32.o note-32.o - -# Build rules - -targets := $(obj-vdso32) vdso32.so vdso32.so.dbg -obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) - -KBUILD_AFLAGS += -DBUILD_VDSO -KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING - -KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -KBUILD_AFLAGS_32 += -m31 -s - -KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) -KBUILD_CFLAGS_32 := $(filter-out -mpacked-stack,$(KBUILD_CFLAGS)) -KBUILD_CFLAGS_32 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out -fno-asynchronous-unwind-tables,$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin -fasynchronous-unwind-tables - -LDFLAGS_vdso32.so.dbg += -shared -soname=linux-vdso32.so.1 \ - --hash-style=both --build-id=sha1 -melf_s390 -T - -$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) -$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) - -obj-y += vdso32_wrapper.o -targets += vdso32.lds -CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) - -# Force dependency (incbin is bad) -$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so - -quiet_cmd_vdso_and_check = VDSO $@ - cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check) - -$(obj)/vdso32.so.dbg: $(obj)/vdso32.lds $(obj-vdso32) FORCE - $(call if_changed,vdso_and_check) - -# strip rule for the .so file -$(obj)/%.so: OBJCOPYFLAGS := -S -$(obj)/%.so: $(obj)/%.so.dbg FORCE - $(call if_changed,objcopy) - -$(obj-vdso32): %-32.o: %.S FORCE - $(call if_changed_dep,vdso32as) - -# actual build commands -quiet_cmd_vdso32as = VDSO32A $@ - cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $< -quiet_cmd_vdso32cc = VDSO32C $@ - cmd_vdso32cc = $(CC) $(c_flags) -c -o $@ $< - -# Generate VDSO offsets using helper script -gen-vdsosym := $(src)/gen_vdso_offsets.sh -quiet_cmd_vdsosym = VDSOSYM $@ - cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ - -include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE - $(call if_changed,vdsosym) diff --git a/arch/s390/kernel/vdso32/gen_vdso_offsets.sh b/arch/s390/kernel/vdso32/gen_vdso_offsets.sh deleted file mode 100755 index 9c4f951e227d..000000000000 --- a/arch/s390/kernel/vdso32/gen_vdso_offsets.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -# -# Match symbols in the DSO that look like VDSO_*; produce a header file -# of constant offsets into the shared object. -# -# Doing this inside the Makefile will break the $(filter-out) function, -# causing Kbuild to rebuild the vdso-offsets header file every time. -# -# Inspired by arm64 version. -# - -LC_ALL=C -sed -n 's/\([0-9a-f]*\) . __kernel_compat_\(.*\)/\#define vdso32_offset_\2\t0x\1/p' diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S deleted file mode 100644 index c916c4f73f76..000000000000 --- a/arch/s390/kernel/vdso32/vdso32.lds.S +++ /dev/null @@ -1,141 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This is the infamous ld script for the 64 bits vdso - * library - */ - -#include <asm/page.h> -#include <asm/vdso.h> - -OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") -OUTPUT_ARCH(s390:31-bit) - -SECTIONS -{ - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif - . = SIZEOF_HEADERS; - - .hash : { *(.hash) } :text - .gnu.hash : { *(.gnu.hash) } - .dynsym : { *(.dynsym) } - .dynstr : { *(.dynstr) } - .gnu.version : { *(.gnu.version) } - .gnu.version_d : { *(.gnu.version_d) } - .gnu.version_r : { *(.gnu.version_r) } - - .note : { *(.note.*) } :text :note - - . = ALIGN(16); - .text : { - *(.text .stub .text.* .gnu.linkonce.t.*) - } :text - PROVIDE(__etext = .); - PROVIDE(_etext = .); - PROVIDE(etext = .); - - /* - * Other stuff is appended to the text segment: - */ - .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } - .rodata1 : { *(.rodata1) } - - .dynamic : { *(.dynamic) } :text :dynamic - - .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr - .eh_frame : { KEEP (*(.eh_frame)) } :text - .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) } - - .rela.dyn ALIGN(8) : { *(.rela.dyn) } - .got ALIGN(8) : { *(.got .toc) } - .got.plt ALIGN(8) : { *(.got.plt) } - - _end = .; - PROVIDE(end = .); - - /* - * Stabs debugging sections are here too. - */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } - - /* - * DWARF debug sections. - * Symbols in the DWARF debugging sections are relative to the - * beginning of the section so we begin them at 0. - */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } - /* DWARF 3 */ - .debug_pubtypes 0 : { *(.debug_pubtypes) } - .debug_ranges 0 : { *(.debug_ranges) } - .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } - - /DISCARD/ : { - *(.note.GNU-stack) - *(.branch_lt) - *(.data .data.* .gnu.linkonce.d.* .sdata*) - *(.bss .sbss .dynbss .dynsbss) - } -} - -/* - * Very old versions of ld do not recognize this name token; use the constant. - */ -#define PT_GNU_EH_FRAME 0x6474e550 - -/* - * We must supply the ELF program headers explicitly to get just one - * PT_LOAD segment, and set the flags explicitly to make segments read-only. - */ -PHDRS -{ - text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ - dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ - note PT_NOTE FLAGS(4); /* PF_R */ - eh_frame_hdr PT_GNU_EH_FRAME; -} - -/* - * This controls what symbols we export from the DSO. - */ -VERSION -{ - VDSO_VERSION_STRING { - global: - /* - * Has to be there for the kernel to find - */ - __kernel_compat_restart_syscall; - __kernel_compat_rt_sigreturn; - __kernel_compat_sigreturn; - local: *; - }; -} diff --git a/arch/s390/kernel/vdso32/vdso_user_wrapper.S b/arch/s390/kernel/vdso32/vdso_user_wrapper.S deleted file mode 100644 index 2e645003fdaf..000000000000 --- a/arch/s390/kernel/vdso32/vdso_user_wrapper.S +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#include <linux/linkage.h> -#include <asm/unistd.h> -#include <asm/dwarf.h> - -.macro vdso_syscall func,syscall - .globl __kernel_compat_\func - .type __kernel_compat_\func,@function - __ALIGN -__kernel_compat_\func: - CFI_STARTPROC - svc \syscall - /* Make sure we notice when a syscall returns, which shouldn't happen */ - .word 0 - CFI_ENDPROC - .size __kernel_compat_\func,.-__kernel_compat_\func -.endm - -vdso_syscall restart_syscall,__NR_restart_syscall -vdso_syscall sigreturn,__NR_sigreturn -vdso_syscall rt_sigreturn,__NR_rt_sigreturn diff --git a/arch/s390/kernel/vdso64/.gitignore b/arch/s390/kernel/vdso64/.gitignore deleted file mode 100644 index 4ec80685fecc..000000000000 --- a/arch/s390/kernel/vdso64/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -vdso64.lds diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile deleted file mode 100644 index 37bb4b761229..000000000000 --- a/arch/s390/kernel/vdso64/Makefile +++ /dev/null @@ -1,79 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# List of files in the vdso - -# Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile -obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o -obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o -VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK) -CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE) -CFLAGS_REMOVE_vgetrandom.o = $(VDSO_CFLAGS_REMOVE) -CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE) - -ifneq ($(c-getrandom-y),) - CFLAGS_vgetrandom.o += -include $(c-getrandom-y) -endif - -# Build rules - -targets := $(obj-vdso64) $(obj-cvdso64) vdso64.so vdso64.so.dbg -obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) -obj-cvdso64 := $(addprefix $(obj)/, $(obj-cvdso64)) - -KBUILD_AFLAGS += -DBUILD_VDSO -KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING - -KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS)) -KBUILD_AFLAGS_64 += -m64 - -KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) -KBUILD_CFLAGS_64 := $(filter-out -mpacked-stack,$(KBUILD_CFLAGS_64)) -KBUILD_CFLAGS_64 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_64)) -KBUILD_CFLAGS_64 := $(filter-out -munaligned-symbols,$(KBUILD_CFLAGS_64)) -KBUILD_CFLAGS_64 := $(filter-out -fno-asynchronous-unwind-tables,$(KBUILD_CFLAGS_64)) -KBUILD_CFLAGS_64 += -m64 -fPIC -fno-common -fno-builtin -fasynchronous-unwind-tables -ldflags-y := -shared -soname=linux-vdso64.so.1 \ - --hash-style=both --build-id=sha1 -T - -$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64) -$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64) - -obj-y += vdso64_wrapper.o -targets += vdso64.lds -CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) - -# Force dependency (incbin is bad) -$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so - -quiet_cmd_vdso_and_check = VDSO $@ - cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check) - -# link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj-cvdso64) FORCE - $(call if_changed,vdso_and_check) - -# strip rule for the .so file -$(obj)/%.so: OBJCOPYFLAGS := -S -$(obj)/%.so: $(obj)/%.so.dbg FORCE - $(call if_changed,objcopy) - -# assembly rules for the .S files -$(obj-vdso64): %.o: %.S FORCE - $(call if_changed_dep,vdso64as) - -$(obj-cvdso64): %.o: %.c FORCE - $(call if_changed_dep,vdso64cc) - -# actual build commands -quiet_cmd_vdso64as = VDSO64A $@ - cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< -quiet_cmd_vdso64cc = VDSO64C $@ - cmd_vdso64cc = $(CC) $(c_flags) -c -o $@ $< - -# Generate VDSO offsets using helper script -gen-vdsosym := $(src)/gen_vdso_offsets.sh -quiet_cmd_vdsosym = VDSOSYM $@ - cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ - -include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE - $(call if_changed,vdsosym) diff --git a/arch/s390/kernel/vdso64/note.S b/arch/s390/kernel/vdso64/note.S deleted file mode 100644 index db19d0680a0a..000000000000 --- a/arch/s390/kernel/vdso64/note.S +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. - * Here we can supply some information useful to userland. - */ - -#include <linux/uts.h> -#include <linux/version.h> -#include <linux/elfnote.h> - -ELFNOTE_START(Linux, 0, "a") - .long LINUX_VERSION_CODE -ELFNOTE_END diff --git a/arch/s390/kernel/vdso64/vdso64_wrapper.S b/arch/s390/kernel/vdso64/vdso64_wrapper.S deleted file mode 100644 index 672184998623..000000000000 --- a/arch/s390/kernel/vdso64/vdso64_wrapper.S +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <linux/init.h> -#include <linux/linkage.h> -#include <asm/page.h> - - __PAGE_ALIGNED_DATA - - .globl vdso64_start, vdso64_end - .balign PAGE_SIZE -vdso64_start: - .incbin "arch/s390/kernel/vdso64/vdso64.so" - .balign PAGE_SIZE -vdso64_end: - - .previous diff --git a/arch/s390/kernel/vmcore_info.c b/arch/s390/kernel/vmcore_info.c index 23f7d7619a99..cc8933e04ff7 100644 --- a/arch/s390/kernel/vmcore_info.c +++ b/arch/s390/kernel/vmcore_info.c @@ -1,8 +1,9 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/vmcore_info.h> -#include <asm/abs_lowcore.h> #include <linux/mm.h> +#include <asm/abs_lowcore.h> +#include <asm/sections.h> #include <asm/setup.h> void arch_crash_save_vmcoreinfo(void) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 377b9aaf8c92..53bcbb91bb9b 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -51,8 +51,7 @@ SECTIONS IRQENTRY_TEXT SOFTIRQENTRY_TEXT FTRACE_HOTPATCH_TRAMPOLINES_TEXT - *(.text.*_indirect_*) - *(.fixup) + *(.text..*_indirect_*) *(.gnu.warning) . = ALIGN(PAGE_SIZE); _etext = .; /* End of text section */ @@ -72,6 +71,13 @@ SECTIONS . = ALIGN(PAGE_SIZE); __end_ro_after_init = .; + . = ALIGN(8); + .skey_region_table : { + __skey_region_start = .; + KEEP(*(.skey_region)) + __skey_region_end = .; + } + .data.rel.ro : { *(.data.rel.ro .data.rel.ro.*) } @@ -144,6 +150,15 @@ SECTIONS *(.altinstr_replacement) } +#ifdef CONFIG_STACKPROTECTOR + . = ALIGN(8); + .stack_prot_table : { + __stack_prot_start = .; + KEEP(*(__stack_protector_loc)) + __stack_prot_end = .; + } +#endif + /* * Table with the patch locations to undo expolines */ @@ -203,6 +218,33 @@ SECTIONS . = ALIGN(PAGE_SIZE); _end = . ; + /* Debugging sections. */ + STABS_DEBUG + DWARF_DEBUG + ELF_DETAILS + + /* + * Make sure that the .got.plt is either completely empty or it + * contains only the three reserved double words. + */ + .got.plt : { + *(.got.plt) + } + ASSERT(SIZEOF(.got.plt) == 0 || SIZEOF(.got.plt) == 0x18, "Unexpected GOT/PLT entries detected!") + + /* + * Sections that should stay zero sized, which is safer to + * explicitly check instead of blindly discarding. + */ + .plt : { + *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt) + } + ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!") + .rela.dyn : { + *(.rela.*) *(.rela_*) + } + ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!") + /* * uncompressed image info used by the decompressor * it should match struct vmlinux_info @@ -224,6 +266,10 @@ SECTIONS QUAD(invalid_pg_dir) QUAD(__alt_instructions) QUAD(__alt_instructions_end) +#ifdef CONFIG_STACKPROTECTOR + QUAD(__stack_prot_start) + QUAD(__stack_prot_end) +#endif #ifdef CONFIG_KASAN QUAD(kasan_early_shadow_page) QUAD(kasan_early_shadow_pte) @@ -233,33 +279,6 @@ SECTIONS #endif } :NONE - /* Debugging sections. */ - STABS_DEBUG - DWARF_DEBUG - ELF_DETAILS - - /* - * Make sure that the .got.plt is either completely empty or it - * contains only the three reserved double words. - */ - .got.plt : { - *(.got.plt) - } - ASSERT(SIZEOF(.got.plt) == 0 || SIZEOF(.got.plt) == 0x18, "Unexpected GOT/PLT entries detected!") - - /* - * Sections that should stay zero sized, which is safer to - * explicitly check instead of blindly discarding. - */ - .plt : { - *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt) - } - ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!") - .rela.dyn : { - *(.rela.*) *(.rela_*) - } - ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!") - /* Sections to be discarded */ DISCARDS /DISCARD/ : { |
